summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-09-19 16:26:14 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-09-19 16:26:14 +0000
commit3ecab8e4555aee0b4aa10c413696a67f55948c39 (patch)
tree312b6fd8b3a9ebc14217e7e19a00d428e3f3f8ff /llvm/lib/Target/AMDGPU
parente0900f285bb532790ed494df901f87c5c8b904da (diff)
downloadbcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.tar.gz
bcm5719-llvm-3ecab8e4555aee0b4aa10c413696a67f55948c39.zip
Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"
This reverts r372314, reapplying r372285 and the commits which depend on it (r372286-r372293, and r372296-r372297) This was missing one switch to getTargetConstant in an untested case. llvm-svn: 372338
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp279
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp60
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp399
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h40
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td110
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp104
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td12
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td18
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td44
15 files changed, 912 insertions, 188 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 217b3996996..73486b969f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -245,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
}
}
-static int64_t getConstant(const MachineInstr *MI) {
- return MI->getOperand(1).getCImm()->getSExtValue();
-}
-
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
switch (Opc) {
case AMDGPU::G_AND:
@@ -737,6 +734,260 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
.addImm(Enabled);
}
+static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
+ int64_t C;
+ if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
+ return true;
+
+ // FIXME: matcher should ignore copies
+ return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
+}
+
+static unsigned extractGLC(unsigned CachePolicy) {
+ return CachePolicy & 1;
+}
+
+static unsigned extractSLC(unsigned CachePolicy) {
+ return (CachePolicy >> 1) & 1;
+}
+
+static unsigned extractDLC(unsigned CachePolicy) {
+ return (CachePolicy >> 2) & 1;
+}
+
+// Returns Base register, constant offset, and offset def point.
+static std::tuple<Register, unsigned, MachineInstr *>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+ if (!Def)
+ return {Reg, 0, nullptr};
+
+ if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
+ unsigned Offset;
+ const MachineOperand &Op = Def->getOperand(1);
+ if (Op.isImm())
+ Offset = Op.getImm();
+ else
+ Offset = Op.getCImm()->getZExtValue();
+
+ return {Register(), Offset, Def};
+ }
+
+ int64_t Offset;
+ if (Def->getOpcode() == AMDGPU::G_ADD) {
+ // TODO: Handle G_OR used for add case
+ if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
+ return {Def->getOperand(0).getReg(), Offset, Def};
+
+ // FIXME: matcher should ignore copies
+ if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
+ return {Def->getOperand(0).getReg(), Offset, Def};
+ }
+
+ return {Reg, 0, Def};
+}
+
+static unsigned getBufferStoreOpcode(LLT Ty,
+ const unsigned MemSize,
+ const bool Offen) {
+ const int Size = Ty.getSizeInBits();
+ switch (8 * MemSize) {
+ case 8:
+ return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
+ case 16:
+ return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
+ default:
+ unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
+ if (Size > 32)
+ Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
+ return Opc;
+ }
+}
+
+static unsigned getBufferStoreFormatOpcode(LLT Ty,
+ const unsigned MemSize,
+ const bool Offen) {
+ bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
+ bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
+ int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
+
+ if (IsD16Packed) {
+ switch (NumElts) {
+ case 1:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
+ case 2:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
+ case 3:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
+ case 4:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
+ default:
+ return -1;
+ }
+ }
+
+ if (IsD16Unpacked) {
+ switch (NumElts) {
+ case 1:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
+ case 2:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
+ case 3:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
+ case 4:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
+ default:
+ return -1;
+ }
+ }
+
+ switch (NumElts) {
+ case 1:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
+ case 2:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
+ case 3:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
+ case 4:
+ return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
+ default:
+ return -1;
+ }
+
+ llvm_unreachable("unhandled buffer store");
+}
+
+// TODO: Move this to combiner
+// Returns base register, imm offset, total constant offset.
+std::tuple<Register, unsigned, unsigned>
+AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
+ Register OrigOffset) const {
+ const unsigned MaxImm = 4095;
+ Register BaseReg;
+ unsigned TotalConstOffset;
+ MachineInstr *OffsetDef;
+ MachineRegisterInfo &MRI = *B.getMRI();
+
+ std::tie(BaseReg, TotalConstOffset, OffsetDef)
+ = getBaseWithConstantOffset(MRI, OrigOffset);
+
+ unsigned ImmOffset = TotalConstOffset;
+
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.f
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+
+ if (Overflow != 0) {
+ // In case this is in a waterfall loop, insert offset code at the def point
+ // of the offset, not inside the loop.
+ MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
+ MachineBasicBlock &OldMBB = B.getMBB();
+ B.setInstr(*OffsetDef);
+
+ if (!BaseReg) {
+ BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(BaseReg)
+ .addImm(Overflow);
+ } else {
+ Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ B.buildInstr(AMDGPU::V_MOV_B32_e32)
+ .addDef(OverflowVal)
+ .addImm(Overflow);
+
+ Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
+ .addReg(BaseReg)
+ .addReg(OverflowVal, RegState::Kill)
+ .addImm(0);
+ BaseReg = NewBaseReg;
+ }
+
+ B.setInsertPt(OldMBB, OldInsPt);
+ }
+
+ return {BaseReg, ImmOffset, TotalConstOffset};
+}
+
+bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
+ bool IsFormat) const {
+ MachineIRBuilder B(MI);
+ MachineRegisterInfo &MRI = *B.getMRI();
+ MachineFunction &MF = B.getMF();
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(VData);
+
+ int Size = Ty.getSizeInBits();
+ if (Size % 32 != 0)
+ return false;
+
+ // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+ const int MemSize = MMO->getSize();
+
+ Register RSrc = MI.getOperand(2).getReg();
+ Register VOffset = MI.getOperand(3).getReg();
+ Register SOffset = MI.getOperand(4).getReg();
+ unsigned CachePolicy = MI.getOperand(5).getImm();
+ unsigned ImmOffset;
+ unsigned TotalOffset;
+
+ std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
+ if (TotalOffset != 0)
+ MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
+
+ const bool Offen = !isZero(VOffset, MRI);
+
+ int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
+ getBufferStoreOpcode(Ty, MemSize, Offen);
+ if (Opc == -1)
+ return false;
+
+ MachineInstrBuilder MIB = B.buildInstr(Opc)
+ .addUse(VData);
+
+ if (Offen)
+ MIB.addUse(VOffset);
+
+ MIB.addUse(RSrc)
+ .addUse(SOffset)
+ .addImm(ImmOffset)
+ .addImm(extractGLC(CachePolicy))
+ .addImm(extractSLC(CachePolicy))
+ .addImm(0) // tfe: FIXME: Remove from inst
+ .addImm(extractDLC(CachePolicy))
+ .addMemOperand(MMO);
+
+ MI.eraseFromParent();
+
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@@ -746,10 +997,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
- int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
- int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
- int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
- int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
+ int64_t Tgt = I.getOperand(1).getImm();
+ int64_t Enabled = I.getOperand(2).getImm();
+ int64_t Done = I.getOperand(7).getImm();
+ int64_t VM = I.getOperand(8).getImm();
MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
I.getOperand(4).getReg(),
@@ -762,13 +1013,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
}
case Intrinsic::amdgcn_exp_compr: {
const DebugLoc &DL = I.getDebugLoc();
- int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
- int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
+ int64_t Tgt = I.getOperand(1).getImm();
+ int64_t Enabled = I.getOperand(2).getImm();
Register Reg0 = I.getOperand(3).getReg();
Register Reg1 = I.getOperand(4).getReg();
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
- int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
+ int64_t Done = I.getOperand(5).getImm();
+ int64_t VM = I.getOperand(6).getImm();
BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
@@ -791,6 +1042,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
return true;
}
+ case Intrinsic::amdgcn_raw_buffer_store:
+ return selectStoreIntrinsic(I, false);
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ return selectStoreIntrinsic(I, true);
default:
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 67a5062ca39..8c6e8976194 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -35,6 +35,7 @@ class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
class GCNSubtarget;
class MachineInstr;
+class MachineIRBuilder;
class MachineOperand;
class MachineRegisterInfo;
class SIInstrInfo;
@@ -82,6 +83,12 @@ private:
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
bool selectG_INSERT(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
+
+ std::tuple<Register, unsigned, unsigned>
+ splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
+
+ bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
+
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
bool selectG_ICMP(MachineInstr &I) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index abfb4525935..02b3b421026 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1751,6 +1751,62 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
return true;
}
+/// Handle register layout difference for f16 images for some subtargets.
+Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register Reg) const {
+ if (!ST.hasUnpackedD16VMem())
+ return Reg;
+
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ LLT StoreVT = MRI.getType(Reg);
+ assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
+
+ auto Unmerge = B.buildUnmerge(S16, Reg);
+
+ SmallVector<Register, 4> WideRegs;
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
+
+ int NumElts = StoreVT.getNumElements();
+
+ return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+}
+
+bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ bool IsFormat) const {
+ // TODO: Reject f16 format on targets where unsupported.
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(VData);
+
+ B.setInstr(MI);
+
+ const LLT S32 = LLT::scalar(32);
+ const LLT S16 = LLT::scalar(16);
+
+ // Fixup illegal register types for i8 stores.
+ if (Ty == LLT::scalar(8) || Ty == S16) {
+ Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
+ MI.getOperand(1).setReg(AnyExt);
+ return true;
+ }
+
+ if (Ty.isVector()) {
+ if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
+ if (IsFormat)
+ MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
+ return true;
+ }
+
+ return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
+ }
+
+ return Ty == S32;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1843,6 +1899,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::amdgcn_raw_buffer_store:
+ return legalizeRawBufferStore(MI, MRI, B, false);
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ return legalizeRawBufferStore(MI, MRI, B, true);
default:
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 99564a04dbb..855444fa276 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -83,6 +83,11 @@ public:
MachineIRBuilder &B) const;
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, unsigned AddrSpace) const;
+
+ Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register Reg) const;
+ bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, bool IsFormat) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 92d5a5d07c7..0032d046862 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -33,6 +34,7 @@
#include "AMDGPUGenRegisterBankInfo.def"
using namespace llvm;
+using namespace MIPatternMatch;
namespace {
@@ -84,9 +86,11 @@ public:
};
}
-AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
+AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
: AMDGPUGenRegisterBankInfo(),
- TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
+ Subtarget(ST),
+ TRI(Subtarget.getRegisterInfo()),
+ TII(Subtarget.getInstrInfo()) {
// HACK: Until this is fully tablegen'd.
static bool AlreadyInit = false;
@@ -638,8 +642,10 @@ static LLT getHalfSizedType(LLT Ty) {
///
/// There is additional complexity to try for compare values to identify the
/// unique values used.
-void AMDGPURegisterBankInfo::executeInWaterfallLoop(
- MachineInstr &MI, MachineRegisterInfo &MRI,
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineIRBuilder &B,
+ MachineInstr &MI,
+ MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const {
MachineFunction *MF = MI.getParent()->getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
@@ -662,9 +668,8 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
// No operands need to be replaced, so no need to loop.
if (SGPROperandRegs.empty())
- return;
+ return false;
- MachineIRBuilder B(MI);
SmallVector<Register, 4> ResultRegs;
SmallVector<Register, 4> InitResultRegs;
SmallVector<Register, 4> PhiRegs;
@@ -922,6 +927,18 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.buildInstr(AMDGPU::S_MOV_B64_term)
.addDef(AMDGPU::EXEC)
.addReg(SaveExecReg);
+
+ // Restore the insert point before the original instruction.
+ B.setInsertPt(MBB, MBB.end());
+
+ return true;
+}
+
+bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
+ MachineInstr &MI, MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const {
+ MachineIRBuilder B(MI);
+ return executeInWaterfallLoop(B, MI, MRI, OpIndices);
}
// Legalize an operand that must be an SGPR by inserting a readfirstlane.
@@ -1031,6 +1048,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
return true;
}
+bool AMDGPURegisterBankInfo::applyMappingImage(
+ MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI, int RsrcIdx) const {
+ const int NumDefs = MI.getNumExplicitDefs();
+
+ // The reported argument index is relative to the IR intrinsic call arguments,
+ // so we need to shift by the number of defs and the intrinsic ID.
+ RsrcIdx += NumDefs + 1;
+
+ // Insert copies to VGPR arguments.
+ applyDefaultMapping(OpdMapper);
+
+ // Fixup any SGPR arguments.
+ SmallVector<unsigned, 4> SGPRIndexes;
+ for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
+ if (!MI.getOperand(I).isReg())
+ continue;
+
+ // If this intrinsic has a sampler, it immediately follows rsrc.
+ if (I == RsrcIdx || I == RsrcIdx + 1)
+ SGPRIndexes.push_back(I);
+ }
+
+ executeInWaterfallLoop(MI, MRI, SGPRIndexes);
+ return true;
+}
+
// For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand
static void substituteSimpleCopyRegs(
@@ -1042,6 +1086,184 @@ static void substituteSimpleCopyRegs(
}
}
+/// Handle register layout difference for f16 images for some subtargets.
+Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
+ MachineRegisterInfo &MRI,
+ Register Reg) const {
+ if (!Subtarget.hasUnpackedD16VMem())
+ return Reg;
+
+ const LLT S16 = LLT::scalar(16);
+ LLT StoreVT = MRI.getType(Reg);
+ if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
+ return Reg;
+
+ auto Unmerge = B.buildUnmerge(S16, Reg);
+
+
+ SmallVector<Register, 4> WideRegs;
+ for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
+ WideRegs.push_back(Unmerge.getReg(I));
+
+ const LLT S32 = LLT::scalar(32);
+ int NumElts = StoreVT.getNumElements();
+
+ return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
+}
+
+static std::pair<Register, unsigned>
+getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
+ int64_t Const;
+ if (mi_match(Reg, MRI, m_ICst(Const)))
+ return std::make_pair(Register(), Const);
+
+ Register Base;
+ if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
+ return std::make_pair(Base, Const);
+
+ // TODO: Handle G_OR used for add case
+ return std::make_pair(Reg, 0);
+}
+
+std::pair<Register, unsigned>
+AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
+ Register OrigOffset) const {
+ const unsigned MaxImm = 4095;
+ Register BaseReg;
+ unsigned ImmOffset;
+ const LLT S32 = LLT::scalar(32);
+
+ std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
+ OrigOffset);
+
+ unsigned C1 = 0;
+ if (ImmOffset != 0) {
+ // If the immediate value is too big for the immoffset field, put the value
+ // and -4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.
+ // However, do not do that rounding down to a multiple of 4096 if that is a
+ // negative number, as it appears to be illegal to have a negative offset
+ // in the vgpr, even if adding the immediate offset makes it positive.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ if ((int32_t)Overflow < 0) {
+ Overflow += ImmOffset;
+ ImmOffset = 0;
+ }
+
+ C1 = ImmOffset;
+ if (Overflow != 0) {
+ if (!BaseReg)
+ BaseReg = B.buildConstant(S32, Overflow).getReg(0);
+ else {
+ auto OverflowVal = B.buildConstant(S32, Overflow);
+ BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
+ }
+ }
+ }
+
+ if (!BaseReg)
+ BaseReg = B.buildConstant(S32, 0).getReg(0);
+
+ return {BaseReg, C1};
+}
+
+static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
+ int64_t C;
+ return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
+}
+
+static unsigned extractGLC(unsigned CachePolicy) {
+ return CachePolicy & 1;
+}
+
+static unsigned extractSLC(unsigned CachePolicy) {
+ return (CachePolicy >> 1) & 1;
+}
+
+static unsigned extractDLC(unsigned CachePolicy) {
+ return (CachePolicy >> 2) & 1;
+}
+
+MachineInstr *
+AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
+ MachineInstr &MI) const {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ executeInWaterfallLoop(B, MI, MRI, {2, 4});
+
+ // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
+
+ Register VData = MI.getOperand(1).getReg();
+ LLT Ty = MRI.getType(VData);
+
+ int EltSize = Ty.getScalarSizeInBits();
+ int Size = Ty.getSizeInBits();
+
+ // FIXME: Broken integer truncstore.
+ if (EltSize != 32)
+ report_fatal_error("unhandled intrinsic store");
+
+ // FIXME: Verifier should enforce 1 MMO for these intrinsics.
+ const int MemSize = (*MI.memoperands_begin())->getSize();
+
+
+ Register RSrc = MI.getOperand(2).getReg();
+ Register VOffset = MI.getOperand(3).getReg();
+ Register SOffset = MI.getOperand(4).getReg();
+ unsigned CachePolicy = MI.getOperand(5).getImm();
+
+ unsigned ImmOffset;
+ std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
+
+ const bool Offen = !isZero(VOffset, MRI);
+
+ unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
+ switch (8 * MemSize) {
+ case 8:
+ Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
+ break;
+ case 16:
+ Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
+ break;
+ default:
+ Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
+ AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
+ if (Size > 32)
+ Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
+ break;
+ }
+
+
+ // Set the insertion point back to the instruction in case it was moved into a
+ // loop.
+ B.setInstr(MI);
+
+ MachineInstrBuilder MIB = B.buildInstr(Opc)
+ .addUse(VData);
+
+ if (Offen)
+ MIB.addUse(VOffset);
+
+ MIB.addUse(RSrc)
+ .addUse(SOffset)
+ .addImm(ImmOffset)
+ .addImm(extractGLC(CachePolicy))
+ .addImm(extractSLC(CachePolicy))
+ .addImm(0) // tfe: FIXME: Remove from inst
+ .addImm(extractDLC(CachePolicy))
+ .cloneMemRefs(MI);
+
+ // FIXME: We need a way to report failure from applyMappingImpl.
+ // Insert constrain copies before inserting the loop.
+ if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
+ report_fatal_error("failed to constrain selected store intrinsic");
+
+ return MIB;
+}
+
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -1405,7 +1627,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
+ auto IntrID = MI.getIntrinsicID();
+ switch (IntrID) {
case Intrinsic::amdgcn_buffer_load: {
executeInWaterfallLoop(MI, MRI, { 2 });
return;
@@ -1424,9 +1647,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
- default:
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {2, 4});
+ return;
+ }
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
+ applyDefaultMapping(OpdMapper);
+ executeInWaterfallLoop(MI, MRI, {2, 5});
+ return;
+ }
+ default: {
+ if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+ AMDGPU::lookupRsrcIntrinsic(IntrID)) {
+ // Non-images can have complications from operands that allow both SGPR
+ // and VGPR. For now it's too complicated to figure out the final opcode
+ // to derive the register bank from the MCInstrDesc.
+ if (RSrcIntrin->IsImage) {
+ applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
+ return;
+ }
+ }
+
break;
}
+ }
break;
}
case AMDGPU::G_LOAD:
@@ -1532,6 +1785,45 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
}
const RegisterBankInfo::InstructionMapping &
+AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ int RsrcIdx) const {
+ // The reported argument index is relative to the IR intrinsic call arguments,
+ // so we need to shift by the number of defs and the intrinsic ID.
+ RsrcIdx += MI.getNumExplicitDefs() + 1;
+
+ const int NumOps = MI.getNumOperands();
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
+
+ // TODO: Should packed/unpacked D16 difference be reported here as part of
+ // the value mapping?
+ for (int I = 0; I != NumOps; ++I) {
+ if (!MI.getOperand(I).isReg())
+ continue;
+
+ Register OpReg = MI.getOperand(I).getReg();
+ unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
+
+ // FIXME: Probably need a new intrinsic register bank searchable table to
+ // handle arbitrary intrinsics easily.
+ //
+ // If this has a sampler, it immediately follows rsrc.
+ const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
+
+ if (MustBeSGPR) {
+ // If this must be an SGPR, so we must report whatever it is as legal.
+ unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
+ } else {
+ // Some operands must be VGPR, and these are easy to copy to.
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ }
+ }
+
+ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
+}
+
+const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
@@ -1577,11 +1869,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
return Bank ? Bank->getID() : Default;
}
+
static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
}
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ // Lie and claim anything is legal, even though this needs to be an SGPR
+ // applyMapping will have to deal with it as a waterfall loop.
+ unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ return AMDGPU::getValueMapping(Bank, Size);
+}
+
+const RegisterBankInfo::ValueMapping *
+AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const {
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+}
+
///
/// This function must return a legal mapping, because
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@@ -1748,7 +2060,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLVM_FALLTHROUGH;
}
-
case AMDGPU::G_GEP:
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
@@ -1764,8 +2075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE:
- case AMDGPU::G_UMULH:
- case AMDGPU::G_SMULH:
case AMDGPU::G_SMIN:
case AMDGPU::G_SMAX:
case AMDGPU::G_UMIN:
@@ -1799,6 +2108,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_INTRINSIC_ROUND:
return getDefaultMappingVOP(MI);
+ case AMDGPU::G_UMULH:
+ case AMDGPU::G_SMULH: {
+ if (MF.getSubtarget<GCNSubtarget>().hasScalarMulHiInsts() &&
+ isSALUMapping(MI))
+ return getDefaultMappingSOP(MI);
+ return getDefaultMappingVOP(MI);
+ }
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@@ -2072,6 +2388,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm:
return getDefaultMappingVOP(MI);
+ case Intrinsic::amdgcn_ds_swizzle:
case Intrinsic::amdgcn_ds_permute:
case Intrinsic::amdgcn_ds_bpermute:
case Intrinsic::amdgcn_update_dpp:
@@ -2193,9 +2510,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
- switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
- default:
- return getInvalidInstructionMapping();
+ auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
+ switch (IntrID) {
case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime:
case Intrinsic::amdgcn_s_memrealtime:
@@ -2235,18 +2551,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_exp:
- OpdsMapping[0] = nullptr; // IntrinsicID
- // FIXME: These are immediate values which can't be read from registers.
- OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
// FIXME: Could we support packed types here?
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
- // FIXME: These are immediate values which can't be read from registers.
- OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
- OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_buffer_load: {
Register RSrc = MI.getOperand(2).getReg(); // SGPR
@@ -2298,6 +2607,54 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
break;
}
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_tbuffer_load: {
+ // FIXME: Should make intrinsic ID the last operand of the instruction,
+ // then this would be the same as store
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_tbuffer_load: {
+ OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+ break;
+ }
+ case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
+ OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
+ break;
+ }
+ default:
+ if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
+ AMDGPU::lookupRsrcIntrinsic(IntrID)) {
+ // Non-images can have complications from operands that allow both SGPR
+ // and VGPR. For now it's too complicated to figure out the final opcode
+ // to derive the register bank from the MCInstrDesc.
+ if (RSrcIntrin->IsImage)
+ return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
+ }
+
+ return getInvalidInstructionMapping();
}
break;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index f3a96e2a612..584b23c0c22 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -23,7 +23,9 @@
namespace llvm {
class LLT;
+class GCNSubtarget;
class MachineIRBuilder;
+class SIInstrInfo;
class SIRegisterInfo;
class TargetRegisterInfo;
@@ -36,9 +38,15 @@ protected:
#include "AMDGPUGenRegisterBank.inc"
};
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+ const GCNSubtarget &Subtarget;
const SIRegisterInfo *TRI;
+ const SIInstrInfo *TII;
- void executeInWaterfallLoop(MachineInstr &MI,
+ bool executeInWaterfallLoop(MachineIRBuilder &B,
+ MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ArrayRef<unsigned> OpIndices) const;
+ bool executeInWaterfallLoop(MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const;
@@ -47,6 +55,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
bool applyMappingWideLoad(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI) const;
+ bool
+ applyMappingImage(MachineInstr &MI,
+ const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
+ MachineRegisterInfo &MRI, int RSrcIdx) const;
+
+ Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ Register Reg) const;
+
+ std::pair<Register, unsigned>
+ splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
+
+ MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
+ MachineInstr &MI) const;
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
@@ -58,6 +79,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const;
+ // Return a value mapping for an operand that is required to be an SGPR.
+ const ValueMapping *getSGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
+ // Return a value mapping for an operand that is required to be a VGPR.
+ const ValueMapping *getVGPROpMapping(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) const;
+
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
/// Regs. This appropriately sets the regbank of the new registers.
void split64BitValueForMapping(MachineIRBuilder &B,
@@ -90,8 +121,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingAllVGPR(
const MachineInstr &MI) const;
+
+ const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI,
+ int RsrcIdx) const;
+
public:
- AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
+ AMDGPURegisterBankInfo(const GCNSubtarget &STI);
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 3e556b46fa5..b1069d4a319 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
- RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
+ RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
InstSelector.reset(new AMDGPUInstructionSelector(
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c9d305a5bba..bf7cf86bc42 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -555,6 +555,10 @@ public:
return GFX9Insts;
}
+ bool hasScalarMulHiInsts() const {
+ return GFX9Insts;
+ }
+
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 1af12721b64..40887a3c56e 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm<imm, [{
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
@@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_glc $cachepolicy),
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_glc $cachepolicy),
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0)),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm)),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1404,8 +1404,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- 0, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ 0, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1416,8 +1416,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, 0),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1428,8 +1428,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
- i32:$voffset, i32:$soffset, imm:$offset,
- imm:$cachepolicy, imm),
+ i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@@ -1642,32 +1642,32 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0)),
+ (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, imm)),
+ (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
@@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in {
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, imm),
+ (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
- (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
- imm:$format, imm:$cachepolicy, 0),
+ (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
+ timm:$format, timm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
@@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
- imm:$offset, imm:$format, imm:$cachepolicy, imm),
+ timm:$offset, timm:$format, timm:$cachepolicy, timm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e1d53ae71a8..86c2db92acb 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
//===----------------------------------------------------------------------===//
def : GCNPat <
- (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
+ (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e459a3a4e0d..690114297d6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
SDValue Ops[] = {
- DAG.getEntryNode(), // Chain
- Rsrc, // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- {}, // voffset
- {}, // soffset
- {}, // offset
- DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getEntryNode(), // Chain
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ {}, // voffset
+ {}, // soffset
+ {}, // offset
+ DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
// Use the alignment to ensure that the required offsets will fit into the
@@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
- Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
+ Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
Ops, LoadVT, MMO));
}
@@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
- DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
S, // Src2 - holds two f16 values selected by high
- DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(4), // high
- DAG.getConstant(0, DL, MVT::i1), // $clamp
- DAG.getConstant(0, DL, MVT::i32) // $omod
+ DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+ DAG.getTargetConstant(0, DL, MVT::i32) // $omod
};
return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
} else {
@@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
- DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(4), // high
- DAG.getConstant(0, DL, MVT::i1), // $clamp
- DAG.getConstant(0, DL, MVT::i32), // $omod
+ DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
+ DAG.getTargetConstant(0, DL, MVT::i32), // $omod
Glue
};
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
@@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2), // Src0
Op.getOperand(3), // Attrchan
Op.getOperand(4), // Attr
- DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(1), // Src2
- DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
+ DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(5), // high
- DAG.getConstant(0, DL, MVT::i1), // $clamp
+ DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
Glue
};
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
@@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
@@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // soffset
Offsets.second, // offset
Op.getOperand(5), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
- DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.second, // offset
Op.getOperand(5), // format
Op.getOperand(6), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getValueType();
@@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
@@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
@@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
EVT VT = Op.getValueType();
@@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(7), // soffset
Offsets.second, // offset
Op.getOperand(8), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // voffset
Op.getOperand(6), // soffset
Op.getOperand(7), // offset
- DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
+ DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.second, // offset
Op.getOperand(7), // format
Op.getOperand(8), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idexen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idexen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
@@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
- DAG.getConstant(0, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
unsigned Opc =
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
@@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
- DAG.getConstant(1, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
@@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
- DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
- DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
+ DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getOperand(2).getValueType();
@@ -7084,7 +7084,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
Overflow += ImmOffset;
ImmOffset = 0;
}
- C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+ C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
if (Overflow) {
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
if (!N0)
@@ -7098,7 +7098,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
if (!N0)
N0 = DAG.getConstant(0, DL, MVT::i32);
if (!C1)
- C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+ C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
return {N0, SDValue(C1, 0)};
}
@@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
- Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+ Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
return;
}
}
@@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Subtarget, Align)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
- Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
+ Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
return;
}
}
Offsets[0] = CombinedOffset;
Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
- Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
+ Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
}
// Handle 8 bit and 16 bit buffer loads
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3ae0da3545c..3c0cc0051c6 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
- (i32 imm:$attr)))]
+ [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
+ (i32 timm:$attr)))]
>;
let OtherPredicates = [has32BankLDS] in {
@@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
- (i32 imm:$attr)))]>;
+ [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
+ (i32 timm:$attr)))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
@@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
- [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
- (i32 imm:$attr)))]>;
+ [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
+ (i32 timm:$attr)))]>;
} // End Uses = [M0, EXEC]
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 0eb01434deb..2cd4e1cbc07 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
- [(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
+ [(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
@@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
// maximum really 15 on VI?
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
- "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
+ "s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
let Uses = [EXEC, M0] in {
// FIXME: Should this be mayLoad+mayStore?
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
- [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
+ [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
- [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
+ [(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
} // End Uses = [EXEC, M0]
@@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;
}
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
- [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
+ [(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
}
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
- [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
+ [(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in {
// S_GETREG_B32 Intrinsic Pattern.
//===----------------------------------------------------------------------===//
def : GCNPat <
- (int_amdgcn_s_getreg imm:$simm16),
+ (int_amdgcn_s_getreg timm:$simm16),
(S_GETREG_B32 (as_i16imm $simm16))
>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 76abda9218f..bea0c7bd080 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
let OtherPredicates = [isGFX8GFX9] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
- imm:$bound_ctrl)),
+ (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
(V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
>;
def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
- imm:$bank_mask, imm:$bound_ctrl)),
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
@@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let OtherPredicates = [isGFX10Plus] in {
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
+ (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
>;
def : GCNPat <
- (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
- imm:$bound_ctrl)),
+ (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
+ timm:$bound_ctrl)),
(V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl), (i32 0))
>;
def : GCNPat <
- (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
- imm:$bank_mask, imm:$bound_ctrl)),
+ (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
+ timm:$bank_mask, timm:$bound_ctrl)),
(V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl), (i32 0))
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 753f63d3a74..605425972b1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
- imm:$cbsz, imm:$abid, imm:$blgp))];
+ timm:$cbsz, timm:$abid, timm:$blgp))];
}
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
@@ -453,13 +453,13 @@ let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in {
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
- [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
- (i32 imm:$attr),
- (i32 imm:$src0_modifiers),
+ [(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i32 timm:$src0_modifiers),
(f32 VRegSrc_32:$src2),
- (i32 imm:$src2_modifiers),
- (i1 imm:$high),
- (i1 imm:$clamp)))]>;
+ (i32 timm:$src2_modifiers),
+ (i1 timm:$high),
+ (i1 timm:$clamp)))]>;
} // End Uses = [M0, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1
@@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
let Uses = [M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
- [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
- (i32 imm:$attr),
- (i32 imm:$src0_modifiers),
- (i1 imm:$high),
- (i1 imm:$clamp),
- (i32 imm:$omod)))]>;
+ [(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i32 timm:$src0_modifiers),
+ (i1 timm:$high),
+ (i1 timm:$clamp),
+ (i32 timm:$omod)))]>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
- [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
- (i32 imm:$attr),
- (i32 imm:$src0_modifiers),
+ [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
+ (i32 timm:$attr),
+ (i32 timm:$src0_modifiers),
(f32 VRegSrc_32:$src2),
- (i32 imm:$src2_modifiers),
- (i1 imm:$high),
- (i1 imm:$clamp),
- (i32 imm:$omod)))]>;
+ (i32 timm:$src2_modifiers),
+ (i1 timm:$high),
+ (i1 timm:$clamp),
+ (i32 timm:$omod)))]>;
} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in {
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
def : GCNPat<
- (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+ (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;
def : GCNPat<
- (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
+ (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;
} // End SubtargetPredicate = isGFX10Plus
OpenPOWER on IntegriCloud