diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-19 16:26:14 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-19 16:26:14 +0000 |
commit | 3ecab8e4555aee0b4aa10c413696a67f55948c39 (patch) | |
tree | 312b6fd8b3a9ebc14217e7e19a00d428e3f3f8ff /llvm/lib/Target/AMDGPU | |
parent | e0900f285bb532790ed494df901f87c5c8b904da (diff) | |
download | bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.tar.gz bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.zip |
Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This reverts r372314, reapplying r372285 and the commits which depend
on it (r372286-r372293, and r372296-r372297)
This was missing one switch to getTargetConstant in an untested case.
llvm-svn: 372338
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 279 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 60 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 399 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 40 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 110 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/DSInstructions.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 104 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP1Instructions.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 44 |
15 files changed, 912 insertions, 188 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 217b3996996..73486b969f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -245,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, } } -static int64_t getConstant(const MachineInstr *MI) { - return MI->getOperand(1).getCImm()->getSExtValue(); -} - static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { switch (Opc) { case AMDGPU::G_AND: @@ -737,6 +734,260 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt, .addImm(Enabled); } +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + if (mi_match(Reg, MRI, m_ICst(C)) && C == 0) + return true; + + // FIXME: matcher should ignore copies + return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0; +} + +static unsigned extractGLC(unsigned CachePolicy) { + return CachePolicy & 1; +} + +static unsigned extractSLC(unsigned CachePolicy) { + return (CachePolicy >> 1) & 1; +} + +static unsigned extractDLC(unsigned CachePolicy) { + return (CachePolicy >> 2) & 1; +} + +// Returns Base register, constant offset, and offset def point. +static std::tuple<Register, unsigned, MachineInstr *> +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (!Def) + return {Reg, 0, nullptr}; + + if (Def->getOpcode() == AMDGPU::G_CONSTANT) { + unsigned Offset; + const MachineOperand &Op = Def->getOperand(1); + if (Op.isImm()) + Offset = Op.getImm(); + else + Offset = Op.getCImm()->getZExtValue(); + + return {Register(), Offset, Def}; + } + + int64_t Offset; + if (Def->getOpcode() == AMDGPU::G_ADD) { + // TODO: Handle G_OR used for add case + if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset))) + return {Def->getOperand(0).getReg(), Offset, Def}; + + // FIXME: matcher should ignore copies + if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset)))) + return {Def->getOperand(0).getReg(), Offset, Def}; + } + + return {Reg, 0, Def}; +} + +static unsigned getBufferStoreOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + const int Size = Ty.getSizeInBits(); + switch (8 * MemSize) { + case 8: + return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + case 16: + return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + default: + unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + return Opc; + } +} + +static unsigned getBufferStoreFormatOpcode(LLT Ty, + const unsigned MemSize, + const bool Offen) { + bool IsD16Packed = Ty.getScalarSizeInBits() == 16; + bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits(); + int NumElts = Ty.isVector() ? Ty.getNumElements() : 1; + + if (IsD16Packed) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact; + default: + return -1; + } + } + + if (IsD16Unpacked) { + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact; + default: + return -1; + } + } + + switch (NumElts) { + case 1: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact; + case 2: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact; + case 3: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact; + case 4: + return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact : + AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact; + default: + return -1; + } + + llvm_unreachable("unhandled buffer store"); +} + +// TODO: Move this to combiner +// Returns base register, imm offset, total constant offset. +std::tuple<Register, unsigned, unsigned> +AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned TotalConstOffset; + MachineInstr *OffsetDef; + MachineRegisterInfo &MRI = *B.getMRI(); + + std::tie(BaseReg, TotalConstOffset, OffsetDef) + = getBaseWithConstantOffset(MRI, OrigOffset); + + unsigned ImmOffset = TotalConstOffset; + + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store.f + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + if (Overflow != 0) { + // In case this is in a waterfall loop, insert offset code at the def point + // of the offset, not inside the loop. + MachineBasicBlock::iterator OldInsPt = B.getInsertPt(); + MachineBasicBlock &OldMBB = B.getMBB(); + B.setInstr(*OffsetDef); + + if (!BaseReg) { + BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(BaseReg) + .addImm(Overflow); + } else { + Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + B.buildInstr(AMDGPU::V_MOV_B32_e32) + .addDef(OverflowVal) + .addImm(Overflow); + + Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg) + .addReg(BaseReg) + .addReg(OverflowVal, RegState::Kill) + .addImm(0); + BaseReg = NewBaseReg; + } + + B.setInsertPt(OldMBB, OldInsPt); + } + + return {BaseReg, ImmOffset, TotalConstOffset}; +} + +bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI, + bool IsFormat) const { + MachineIRBuilder B(MI); + MachineRegisterInfo &MRI = *B.getMRI(); + MachineFunction &MF = B.getMF(); + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + int Size = Ty.getSizeInBits(); + if (Size % 32 != 0) + return false; + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + MachineMemOperand *MMO = *MI.memoperands_begin(); + const int MemSize = MMO->getSize(); + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned CachePolicy = MI.getOperand(5).getImm(); + unsigned ImmOffset; + unsigned TotalOffset; + + std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset); + if (TotalOffset != 0) + MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize); + + const bool Offen = !isZero(VOffset, MRI); + + int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) : + getBufferStoreOpcode(Ty, MemSize, Offen); + if (Opc == -1) + return false; + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(CachePolicy)) + .addImm(extractSLC(CachePolicy)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(CachePolicy)) + .addMemOperand(MMO); + + MI.eraseFromParent(); + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); @@ -746,10 +997,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::amdgcn_exp: { - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); + int64_t Done = I.getOperand(7).getImm(); + int64_t VM = I.getOperand(8).getImm(); MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(), I.getOperand(4).getReg(), @@ -762,13 +1013,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( } case Intrinsic::amdgcn_exp_compr: { const DebugLoc &DL = I.getDebugLoc(); - int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg())); - int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg())); + int64_t Tgt = I.getOperand(1).getImm(); + int64_t Enabled = I.getOperand(2).getImm(); Register Reg0 = I.getOperand(3).getReg(); Register Reg1 = I.getOperand(4).getReg(); Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg())); - int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg())); + int64_t Done = I.getOperand(5).getImm(); + int64_t VM = I.getOperand(6).getImm(); BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef); MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM, @@ -791,6 +1042,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); return true; } + case Intrinsic::amdgcn_raw_buffer_store: + return selectStoreIntrinsic(I, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return selectStoreIntrinsic(I, true); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 67a5062ca39..8c6e8976194 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -35,6 +35,7 @@ class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; class GCNSubtarget; class MachineInstr; +class MachineIRBuilder; class MachineOperand; class MachineRegisterInfo; class SIInstrInfo; @@ -82,6 +83,12 @@ private: bool selectG_IMPLICIT_DEF(MachineInstr &I) const; bool selectG_INSERT(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; + + std::tuple<Register, unsigned, unsigned> + splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const; + + bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const; + bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; bool selectG_ICMP(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index abfb4525935..02b3b421026 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1751,6 +1751,62 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, return true; } +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!ST.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + LLT StoreVT = MRI.getType(Reg); + assert(StoreVT.isVector() && StoreVT.getElementType() == S16); + + auto Unmerge = B.buildUnmerge(S16, Reg); + + SmallVector<Register, 4> WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0)); + + int NumElts = StoreVT.getNumElements(); + + return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + bool IsFormat) const { + // TODO: Reject f16 format on targets where unsupported. + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + B.setInstr(MI); + + const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); + + // Fixup illegal register types for i8 stores. + if (Ty == LLT::scalar(8) || Ty == S16) { + Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0); + MI.getOperand(1).setReg(AnyExt); + return true; + } + + if (Ty.isVector()) { + if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) { + if (IsFormat) + MI.getOperand(1).setReg(handleD16VData(B, MRI, VData)); + return true; + } + + return Ty.getElementType() == S32 && Ty.getNumElements() <= 4; + } + + return Ty == S32; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { @@ -1843,6 +1899,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MI.eraseFromParent(); return true; } + case Intrinsic::amdgcn_raw_buffer_store: + return legalizeRawBufferStore(MI, MRI, B, false); + case Intrinsic::amdgcn_raw_buffer_store_format: + return legalizeRawBufferStore(MI, MRI, B, true); default: return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 99564a04dbb..855444fa276 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -83,6 +83,11 @@ public: MachineIRBuilder &B) const; bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool IsFormat) const; bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 92d5a5d07c7..0032d046862 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -33,6 +34,7 @@ #include "AMDGPUGenRegisterBankInfo.def" using namespace llvm; +using namespace MIPatternMatch; namespace { @@ -84,9 +86,11 @@ public: }; } -AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) +AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterBankInfo(), - TRI(static_cast<const SIRegisterInfo*>(&TRI)) { + Subtarget(ST), + TRI(Subtarget.getRegisterInfo()), + TII(Subtarget.getInstrInfo()) { // HACK: Until this is fully tablegen'd. static bool AlreadyInit = false; @@ -638,8 +642,10 @@ static LLT getHalfSizedType(LLT Ty) { /// /// There is additional complexity to try for compare values to identify the /// unique values used. -void AMDGPURegisterBankInfo::executeInWaterfallLoop( - MachineInstr &MI, MachineRegisterInfo &MRI, +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineIRBuilder &B, + MachineInstr &MI, + MachineRegisterInfo &MRI, ArrayRef<unsigned> OpIndices) const { MachineFunction *MF = MI.getParent()->getParent(); const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); @@ -662,9 +668,8 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( // No operands need to be replaced, so no need to loop. if (SGPROperandRegs.empty()) - return; + return false; - MachineIRBuilder B(MI); SmallVector<Register, 4> ResultRegs; SmallVector<Register, 4> InitResultRegs; SmallVector<Register, 4> PhiRegs; @@ -922,6 +927,18 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop( B.buildInstr(AMDGPU::S_MOV_B64_term) .addDef(AMDGPU::EXEC) .addReg(SaveExecReg); + + // Restore the insert point before the original instruction. + B.setInsertPt(MBB, MBB.end()); + + return true; +} + +bool AMDGPURegisterBankInfo::executeInWaterfallLoop( + MachineInstr &MI, MachineRegisterInfo &MRI, + ArrayRef<unsigned> OpIndices) const { + MachineIRBuilder B(MI); + return executeInWaterfallLoop(B, MI, MRI, OpIndices); } // Legalize an operand that must be an SGPR by inserting a readfirstlane. @@ -1031,6 +1048,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, return true; } +bool AMDGPURegisterBankInfo::applyMappingImage( + MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RsrcIdx) const { + const int NumDefs = MI.getNumExplicitDefs(); + + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += NumDefs + 1; + + // Insert copies to VGPR arguments. + applyDefaultMapping(OpdMapper); + + // Fixup any SGPR arguments. + SmallVector<unsigned, 4> SGPRIndexes; + for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + // If this intrinsic has a sampler, it immediately follows rsrc. + if (I == RsrcIdx || I == RsrcIdx + 1) + SGPRIndexes.push_back(I); + } + + executeInWaterfallLoop(MI, MRI, SGPRIndexes); + return true; +} + // For cases where only a single copy is inserted for matching register banks. // Replace the register in the instruction operand static void substituteSimpleCopyRegs( @@ -1042,6 +1086,184 @@ static void substituteSimpleCopyRegs( } } +/// Handle register layout difference for f16 images for some subtargets. +Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B, + MachineRegisterInfo &MRI, + Register Reg) const { + if (!Subtarget.hasUnpackedD16VMem()) + return Reg; + + const LLT S16 = LLT::scalar(16); + LLT StoreVT = MRI.getType(Reg); + if (!StoreVT.isVector() || StoreVT.getElementType() != S16) + return Reg; + + auto Unmerge = B.buildUnmerge(S16, Reg); + + + SmallVector<Register, 4> WideRegs; + for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I) + WideRegs.push_back(Unmerge.getReg(I)); + + const LLT S32 = LLT::scalar(32); + int NumElts = StoreVT.getNumElements(); + + return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0); +} + +static std::pair<Register, unsigned> +getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { + int64_t Const; + if (mi_match(Reg, MRI, m_ICst(Const))) + return std::make_pair(Register(), Const); + + Register Base; + if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const)))) + return std::make_pair(Base, Const); + + // TODO: Handle G_OR used for add case + return std::make_pair(Reg, 0); +} + +std::pair<Register, unsigned> +AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B, + Register OrigOffset) const { + const unsigned MaxImm = 4095; + Register BaseReg; + unsigned ImmOffset; + const LLT S32 = LLT::scalar(32); + + std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(), + OrigOffset); + + unsigned C1 = 0; + if (ImmOffset != 0) { + // If the immediate value is too big for the immoffset field, put the value + // and -4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store. + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } + + C1 = ImmOffset; + if (Overflow != 0) { + if (!BaseReg) + BaseReg = B.buildConstant(S32, Overflow).getReg(0); + else { + auto OverflowVal = B.buildConstant(S32, Overflow); + BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0); + } + } + } + + if (!BaseReg) + BaseReg = B.buildConstant(S32, 0).getReg(0); + + return {BaseReg, C1}; +} + +static bool isZero(Register Reg, MachineRegisterInfo &MRI) { + int64_t C; + return mi_match(Reg, MRI, m_ICst(C)) && C == 0; +} + +static unsigned extractGLC(unsigned CachePolicy) { + return CachePolicy & 1; +} + +static unsigned extractSLC(unsigned CachePolicy) { + return (CachePolicy >> 1) & 1; +} + +static unsigned extractDLC(unsigned CachePolicy) { + return (CachePolicy >> 2) & 1; +} + +MachineInstr * +AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const { + MachineRegisterInfo &MRI = *B.getMRI(); + executeInWaterfallLoop(B, MI, MRI, {2, 4}); + + // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer. + + Register VData = MI.getOperand(1).getReg(); + LLT Ty = MRI.getType(VData); + + int EltSize = Ty.getScalarSizeInBits(); + int Size = Ty.getSizeInBits(); + + // FIXME: Broken integer truncstore. + if (EltSize != 32) + report_fatal_error("unhandled intrinsic store"); + + // FIXME: Verifier should enforce 1 MMO for these intrinsics. + const int MemSize = (*MI.memoperands_begin())->getSize(); + + + Register RSrc = MI.getOperand(2).getReg(); + Register VOffset = MI.getOperand(3).getReg(); + Register SOffset = MI.getOperand(4).getReg(); + unsigned CachePolicy = MI.getOperand(5).getImm(); + + unsigned ImmOffset; + std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset); + + const bool Offen = !isZero(VOffset, MRI); + + unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact; + switch (8 * MemSize) { + case 8: + Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact : + AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact; + break; + case 16: + Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact : + AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact; + break; + default: + Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact : + AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact; + if (Size > 32) + Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32); + break; + } + + + // Set the insertion point back to the instruction in case it was moved into a + // loop. + B.setInstr(MI); + + MachineInstrBuilder MIB = B.buildInstr(Opc) + .addUse(VData); + + if (Offen) + MIB.addUse(VOffset); + + MIB.addUse(RSrc) + .addUse(SOffset) + .addImm(ImmOffset) + .addImm(extractGLC(CachePolicy)) + .addImm(extractSLC(CachePolicy)) + .addImm(0) // tfe: FIXME: Remove from inst + .addImm(extractDLC(CachePolicy)) + .cloneMemRefs(MI); + + // FIXME: We need a way to report failure from applyMappingImpl. + // Insert constrain copies before inserting the loop. + if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this)) + report_fatal_error("failed to constrain selected store intrinsic"); + + return MIB; +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -1405,7 +1627,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + auto IntrID = MI.getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_buffer_load: { executeInWaterfallLoop(MI, MRI, { 2 }); return; @@ -1424,9 +1647,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, 2); // M0 return; } - default: + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_buffer_load_format: + case Intrinsic::amdgcn_raw_tbuffer_load: + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 4}); + return; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_load: + case Intrinsic::amdgcn_struct_tbuffer_store: { + applyDefaultMapping(OpdMapper); + executeInWaterfallLoop(MI, MRI, {2, 5}); + return; + } + default: { + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) { + applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg); + return; + } + } + break; } + } break; } case AMDGPU::G_LOAD: @@ -1532,6 +1785,45 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { } const RegisterBankInfo::InstructionMapping & +AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const { + // The reported argument index is relative to the IR intrinsic call arguments, + // so we need to shift by the number of defs and the intrinsic ID. + RsrcIdx += MI.getNumExplicitDefs() + 1; + + const int NumOps = MI.getNumOperands(); + SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps); + + // TODO: Should packed/unpacked D16 difference be reported here as part of + // the value mapping? + for (int I = 0; I != NumOps; ++I) { + if (!MI.getOperand(I).isReg()) + continue; + + Register OpReg = MI.getOperand(I).getReg(); + unsigned Size = getSizeInBits(OpReg, MRI, *TRI); + + // FIXME: Probably need a new intrinsic register bank searchable table to + // handle arbitrary intrinsics easily. + // + // If this has a sampler, it immediately follows rsrc. + const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1; + + if (MustBeSGPR) { + // If this must be an SGPR, so we must report whatever it is as legal. + unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size); + } else { + // Some operands must be VGPR, and these are easy to copy to. + OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + } + } + + return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps); +} + +const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); @@ -1577,11 +1869,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg, return Bank ? Bank->getID() : Default; } + static unsigned regBankUnion(unsigned RB0, unsigned RB1) { return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; } +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + // Lie and claim anything is legal, even though this needs to be an SGPR + // applyMapping will have to deal with it as a waterfall loop. + unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID); + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(Bank, Size); +} + +const RegisterBankInfo::ValueMapping * +AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const { + unsigned Size = getSizeInBits(Reg, MRI, TRI); + return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); +} + /// /// This function must return a legal mapping, because /// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called @@ -1748,7 +2060,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLVM_FALLTHROUGH; } - case AMDGPU::G_GEP: case AMDGPU::G_ADD: case AMDGPU::G_SUB: @@ -1764,8 +2075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_SADDE: case AMDGPU::G_USUBE: case AMDGPU::G_SSUBE: - case AMDGPU::G_UMULH: - case AMDGPU::G_SMULH: case AMDGPU::G_SMIN: case AMDGPU::G_SMAX: case AMDGPU::G_UMIN: @@ -1799,6 +2108,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_INTRINSIC_TRUNC: case AMDGPU::G_INTRINSIC_ROUND: return getDefaultMappingVOP(MI); + case AMDGPU::G_UMULH: + case AMDGPU::G_SMULH: { + if (MF.getSubtarget<GCNSubtarget>().hasScalarMulHiInsts() && + isSALUMapping(MI)) + return getDefaultMappingSOP(MI); + return getDefaultMappingVOP(MI); + } case AMDGPU::G_IMPLICIT_DEF: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); @@ -2072,6 +2388,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_wwm: case Intrinsic::amdgcn_wqm: return getDefaultMappingVOP(MI); + case Intrinsic::amdgcn_ds_swizzle: case Intrinsic::amdgcn_ds_permute: case Intrinsic::amdgcn_ds_bpermute: case Intrinsic::amdgcn_update_dpp: @@ -2193,9 +2510,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { - default: - return getInvalidInstructionMapping(); + auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID(); + switch (IntrID) { case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_memtime: case Intrinsic::amdgcn_s_memrealtime: @@ -2235,18 +2551,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_exp: - OpdsMapping[0] = nullptr; // IntrinsicID - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); // FIXME: Could we support packed types here? OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); - // FIXME: These are immediate values which can't be read from registers. - OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); - OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; case Intrinsic::amdgcn_buffer_load: { Register RSrc = MI.getOperand(2).getReg(); // SGPR @@ -2298,6 +2607,54 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); break; } + case Intrinsic::amdgcn_raw_buffer_load: + case Intrinsic::amdgcn_raw_tbuffer_load: { + // FIXME: Should make intrinsic ID the last operand of the instruction, + // then this would be the same as store + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_raw_buffer_store: + case Intrinsic::amdgcn_raw_buffer_store_format: + case Intrinsic::amdgcn_raw_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_load: + case Intrinsic::amdgcn_struct_tbuffer_load: { + OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_struct_buffer_store: + case Intrinsic::amdgcn_struct_tbuffer_store: { + OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); + OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); + OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); + break; + } + default: + if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = + AMDGPU::lookupRsrcIntrinsic(IntrID)) { + // Non-images can have complications from operands that allow both SGPR + // and VGPR. For now it's too complicated to figure out the final opcode + // to derive the register bank from the MCInstrDesc. + if (RSrcIntrin->IsImage) + return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg); + } + + return getInvalidInstructionMapping(); } break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f3a96e2a612..584b23c0c22 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -23,7 +23,9 @@ namespace llvm { class LLT; +class GCNSubtarget; class MachineIRBuilder; +class SIInstrInfo; class SIRegisterInfo; class TargetRegisterInfo; @@ -36,9 +38,15 @@ protected: #include "AMDGPUGenRegisterBank.inc" }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { + const GCNSubtarget &Subtarget; const SIRegisterInfo *TRI; + const SIInstrInfo *TII; - void executeInWaterfallLoop(MachineInstr &MI, + bool executeInWaterfallLoop(MachineIRBuilder &B, + MachineInstr &MI, + MachineRegisterInfo &MRI, + ArrayRef<unsigned> OpIndices) const; + bool executeInWaterfallLoop(MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef<unsigned> OpIndices) const; @@ -47,6 +55,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { bool applyMappingWideLoad(MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const; + bool + applyMappingImage(MachineInstr &MI, + const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, + MachineRegisterInfo &MRI, int RSrcIdx) const; + + Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, + Register Reg) const; + + std::pair<Register, unsigned> + splitBufferOffsets(MachineIRBuilder &B, Register Offset) const; + + MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B, + MachineInstr &MI) const; /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; @@ -58,6 +79,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const TargetRegisterInfo &TRI, unsigned Default = AMDGPU::VGPRRegBankID) const; + // Return a value mapping for an operand that is required to be an SGPR. + const ValueMapping *getSGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + + // Return a value mapping for an operand that is required to be a VGPR. + const ValueMapping *getVGPROpMapping(Register Reg, + const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; + /// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p /// Regs. This appropriately sets the regbank of the new registers. void split64BitValueForMapping(MachineIRBuilder &B, @@ -90,8 +121,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const; const InstructionMapping &getDefaultMappingAllVGPR( const MachineInstr &MI) const; + + const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + int RsrcIdx) const; + public: - AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI); + AMDGPURegisterBankInfo(const GCNSubtarget &STI); unsigned copyCost(const RegisterBank &A, const RegisterBank &B, unsigned Size) const override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 3e556b46fa5..b1069d4a319 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM)); - RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + RegBankInfo.reset(new AMDGPURegisterBankInfo(*this)); InstSelector.reset(new AMDGPUInstructionSelector( *this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM)); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c9d305a5bba..bf7cf86bc42 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -555,6 +555,10 @@ public: return GFX9Insts; } + bool hasScalarMulHiInsts() const { + return GFX9Insts; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 1af12721b64..40887a3c56e 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm<imm, [{ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), @@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">; multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0)), (!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm)), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm)), (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset, (as_i16imm $offset), (extract_slc $cachepolicy)) >; def : GCNPat< (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (!cast<MUBUF_Pseudo>(opcode # _BOTHEN) $vdata_in, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), @@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI def : GCNPat< (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (EXTRACT_SUBREG (BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), @@ -1404,8 +1404,8 @@ def : GCNPat< def : GCNPat< (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, - 0, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + 0, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (EXTRACT_SUBREG (BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), @@ -1416,8 +1416,8 @@ def : GCNPat< def : GCNPat< (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, 0), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, 0), (EXTRACT_SUBREG (BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), @@ -1428,8 +1428,8 @@ def : GCNPat< def : GCNPat< (SIbuffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, - i32:$voffset, i32:$soffset, imm:$offset, - imm:$cachepolicy, imm), + i32:$voffset, i32:$soffset, timm:$offset, + timm:$cachepolicy, timm), (EXTRACT_SUBREG (BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1), @@ -1642,32 +1642,32 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< - (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, timm)), (!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0)), + (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0)), (!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm)), + (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, timm)), (!cast<MTBUF_Pseudo>(opcode # _BOTHEN) (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), $rsrc, $soffset, (as_i16imm $offset), @@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in { multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, string opcode> { def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, imm), + (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, timm), (!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) >; def : GCNPat< - (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, - imm:$format, imm:$cachepolicy, 0), + (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset, + timm:$format, timm:$cachepolicy, 0), (!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format), (extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy)) @@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt, def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, - imm:$offset, imm:$format, imm:$cachepolicy, imm), + timm:$offset, timm:$format, timm:$cachepolicy, timm), (!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact) $vdata, (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1), diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index e1d53ae71a8..86c2db92acb 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">; //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), + (int_amdgcn_ds_swizzle i32:$src, timm:$offset16), (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index e459a3a4e0d..690114297d6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue}); unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue(); SDValue Ops[] = { - DAG.getEntryNode(), // Chain - Rsrc, // rsrc - DAG.getConstant(0, DL, MVT::i32), // vindex - {}, // voffset - {}, // soffset - {}, // offset - DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getEntryNode(), // Chain + Rsrc, // rsrc + DAG.getConstant(0, DL, MVT::i32), // vindex + {}, // voffset + {}, // soffset + {}, // offset + DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; // Use the alignment to ensure that the required offsets will fit into the @@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue(); for (unsigned i = 0; i < NumLoads; ++i) { - Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32); + Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32); Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops, LoadVT, MMO)); } @@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers S, // Src2 - holds two f16 values selected by high - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32) // $omod + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32) // $omod }; return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops); } else { @@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), // Src0 Op.getOperand(2), // Attrchan Op.getOperand(3), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(4), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp - DAG.getConstant(0, DL, MVT::i32), // $omod + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i32), // $omod Glue }; return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops); @@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2), // Src0 Op.getOperand(3), // Attrchan Op.getOperand(4), // Attr - DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers Op.getOperand(1), // Src2 - DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers + DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers Op.getOperand(5), // high - DAG.getConstant(0, DL, MVT::i1), // $clamp + DAG.getTargetConstant(0, DL, MVT::i1), // $clamp Glue }; return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops); @@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); @@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // soffset Offsets.second, // offset Op.getOperand(5), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); @@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); @@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // voffset Op.getOperand(5), // soffset Op.getOperand(6), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.second, // offset Op.getOperand(5), // format Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Offsets.second, // offset Op.getOperand(6), // format Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); EVT VT = Op.getValueType(); @@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); @@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); @@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); EVT VT = Op.getValueType(); @@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); @@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(7), // soffset Offsets.second, // offset Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); @@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // voffset Op.getOperand(6), // soffset Op.getOperand(7), // offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idexen + DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.second, // offset Op.getOperand(7), // format Op.getOperand(8), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idexen + DAG.getTargetConstant(1, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Offsets.second, // offset Op.getOperand(6), // format Op.getOperand(7), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idexen + DAG.getTargetConstant(0, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? @@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // soffset Offsets.second, // offset Op.getOperand(6), // cachepolicy - DAG.getConstant(0, DL, MVT::i1), // idxen + DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; unsigned Opc = IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; @@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(6), // soffset Offsets.second, // offset Op.getOperand(7), // cachepolicy - DAG.getConstant(1, DL, MVT::i1), // idxen + DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; @@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue(), // voffset -- will be set by setBufferOffsets SDValue(), // soffset -- will be set by setBufferOffsets SDValue(), // offset -- will be set by setBufferOffsets - DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy + DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); EVT VT = Op.getOperand(2).getValueType(); @@ -7084,7 +7084,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets( Overflow += ImmOffset; ImmOffset = 0; } - C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32)); + C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32)); if (Overflow) { auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); if (!N0) @@ -7098,7 +7098,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets( if (!N0) N0 = DAG.getConstant(0, DL, MVT::i32); if (!C1) - C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32)); + C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32)); return {N0, SDValue(C1, 0)}; } @@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) { Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); return; } } @@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, Subtarget, Align)) { Offsets[0] = N0; Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); - Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); return; } } Offsets[0] = CombinedOffset; Offsets[1] = DAG.getConstant(0, DL, MVT::i32); - Offsets[2] = DAG.getConstant(0, DL, MVT::i32); + Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); } // Handle 8 bit and 16 bit buffer loads diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 3ae0da3545c..3c0cc0051c6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p1_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))] + [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan), + (i32 timm:$attr)))] >; let OtherPredicates = [has32BankLDS] in { @@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_p2_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan), + (i32 timm:$attr)))]>; } // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst" @@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m < (outs VINTRPDst:$vdst), (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan), "v_interp_mov_f32$vdst, $vsrc, $attr$attrchan", - [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan), - (i32 imm:$attr)))]>; + [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan), + (i32 timm:$attr)))]>; } // End Uses = [M0, EXEC] diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0eb01434deb..2cd4e1cbc07 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> { let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16", - [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>; + [(int_amdgcn_s_waitcnt timm:$simm16)]>; def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">; def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; @@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">; // maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the // maximum really 15 on VI? def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16), - "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> { + "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">; let Uses = [EXEC, M0] in { // FIXME: Should this be mayLoad+mayStore? def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", - [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>; + [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>; def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", - [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>; + [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>; } // End Uses = [EXEC, M0] @@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; } def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16", - [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_incperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; } def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16", - [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> { + [(int_amdgcn_s_decperflevel timm:$simm16)]> { let hasSideEffects = 1; let mayLoad = 1; let mayStore = 1; @@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in { // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// def : GCNPat < - (int_amdgcn_s_getreg imm:$simm16), + (int_amdgcn_s_getreg timm:$simm16), (S_GETREG_B32 (as_i16imm $simm16)) >; diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 76abda9218f..bea0c7bd080 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>; let OtherPredicates = [isGFX8GFX9] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl)) @@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; let OtherPredicates = [isGFX10Plus] in { def : GCNPat < - (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)), + (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) >; def : GCNPat < - (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, - imm:$bound_ctrl)), + (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl), (i32 0)) >; def : GCNPat < - (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, - imm:$bank_mask, imm:$bound_ctrl)), + (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, + timm:$bank_mask, timm:$bound_ctrl)), (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask), (as_i32imm $bank_mask), (as_i1imm $bound_ctrl), (i32 0)) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 753f63d3a74..605425972b1 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> { class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> { list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, - imm:$cbsz, imm:$abid, imm:$blgp))]; + timm:$cbsz, timm:$abid, timm:$blgp))]; } class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> : @@ -453,13 +453,13 @@ let FPDPRounding = 1 in { def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>; let Uses = [M0, EXEC] in { def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, - [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp)))]>; } // End Uses = [M0, EXEC] } // End FPDPRounding = 1 } // End renamedInGFX9 = 1 @@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1 let Uses = [M0, EXEC], FPDPRounding = 1 in { def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, - [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>, - [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan), - (i32 imm:$attr), - (i32 imm:$src0_modifiers), + [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan), + (i32 timm:$attr), + (i32 timm:$src0_modifiers), (f32 VRegSrc_32:$src2), - (i32 imm:$src2_modifiers), - (i1 imm:$high), - (i1 imm:$clamp), - (i32 imm:$omod)))]>; + (i32 timm:$src2_modifiers), + (i1 timm:$high), + (i1 timm:$clamp), + (i32 timm:$omod)))]>; } // End Uses = [M0, EXEC], FPDPRounding = 1 } // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 @@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in { } // End $vdst = $vdst_in, DisableEncoding $vdst_in def : GCNPat< - (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; def : GCNPat< - (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) >; } // End SubtargetPredicate = isGFX10Plus |