summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp279
1 files changed, 12 insertions, 267 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 73486b969f4..217b3996996 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -22,7 +22,6 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
-#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -246,6 +245,10 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
}
}
+static int64_t getConstant(const MachineInstr *MI) {
+ return MI->getOperand(1).getCImm()->getSExtValue();
+}
+
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
switch (Opc) {
case AMDGPU::G_AND:
@@ -734,260 +737,6 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
.addImm(Enabled);
}
-static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
- int64_t C;
- if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
- return true;
-
- // FIXME: matcher should ignore copies
- return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
-}
-
-static unsigned extractGLC(unsigned CachePolicy) {
- return CachePolicy & 1;
-}
-
-static unsigned extractSLC(unsigned CachePolicy) {
- return (CachePolicy >> 1) & 1;
-}
-
-static unsigned extractDLC(unsigned CachePolicy) {
- return (CachePolicy >> 2) & 1;
-}
-
-// Returns Base register, constant offset, and offset def point.
-static std::tuple<Register, unsigned, MachineInstr *>
-getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
- MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
- if (!Def)
- return {Reg, 0, nullptr};
-
- if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
- unsigned Offset;
- const MachineOperand &Op = Def->getOperand(1);
- if (Op.isImm())
- Offset = Op.getImm();
- else
- Offset = Op.getCImm()->getZExtValue();
-
- return {Register(), Offset, Def};
- }
-
- int64_t Offset;
- if (Def->getOpcode() == AMDGPU::G_ADD) {
- // TODO: Handle G_OR used for add case
- if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
- return {Def->getOperand(0).getReg(), Offset, Def};
-
- // FIXME: matcher should ignore copies
- if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
- return {Def->getOperand(0).getReg(), Offset, Def};
- }
-
- return {Reg, 0, Def};
-}
-
-static unsigned getBufferStoreOpcode(LLT Ty,
- const unsigned MemSize,
- const bool Offen) {
- const int Size = Ty.getSizeInBits();
- switch (8 * MemSize) {
- case 8:
- return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
- AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
- case 16:
- return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
- AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
- default:
- unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
- AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
- if (Size > 32)
- Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
- return Opc;
- }
-}
-
-static unsigned getBufferStoreFormatOpcode(LLT Ty,
- const unsigned MemSize,
- const bool Offen) {
- bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
- bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
- int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
-
- if (IsD16Packed) {
- switch (NumElts) {
- case 1:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
- case 2:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
- case 3:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
- case 4:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
- default:
- return -1;
- }
- }
-
- if (IsD16Unpacked) {
- switch (NumElts) {
- case 1:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
- case 2:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
- case 3:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
- case 4:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
- default:
- return -1;
- }
- }
-
- switch (NumElts) {
- case 1:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
- case 2:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
- case 3:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
- case 4:
- return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
- AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
- default:
- return -1;
- }
-
- llvm_unreachable("unhandled buffer store");
-}
-
-// TODO: Move this to combiner
-// Returns base register, imm offset, total constant offset.
-std::tuple<Register, unsigned, unsigned>
-AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
- Register OrigOffset) const {
- const unsigned MaxImm = 4095;
- Register BaseReg;
- unsigned TotalConstOffset;
- MachineInstr *OffsetDef;
- MachineRegisterInfo &MRI = *B.getMRI();
-
- std::tie(BaseReg, TotalConstOffset, OffsetDef)
- = getBaseWithConstantOffset(MRI, OrigOffset);
-
- unsigned ImmOffset = TotalConstOffset;
-
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.f
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
- unsigned Overflow = ImmOffset & ~MaxImm;
- ImmOffset -= Overflow;
- if ((int32_t)Overflow < 0) {
- Overflow += ImmOffset;
- ImmOffset = 0;
- }
-
- if (Overflow != 0) {
- // In case this is in a waterfall loop, insert offset code at the def point
- // of the offset, not inside the loop.
- MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
- MachineBasicBlock &OldMBB = B.getMBB();
- B.setInstr(*OffsetDef);
-
- if (!BaseReg) {
- BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- B.buildInstr(AMDGPU::V_MOV_B32_e32)
- .addDef(BaseReg)
- .addImm(Overflow);
- } else {
- Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- B.buildInstr(AMDGPU::V_MOV_B32_e32)
- .addDef(OverflowVal)
- .addImm(Overflow);
-
- Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
- .addReg(BaseReg)
- .addReg(OverflowVal, RegState::Kill)
- .addImm(0);
- BaseReg = NewBaseReg;
- }
-
- B.setInsertPt(OldMBB, OldInsPt);
- }
-
- return {BaseReg, ImmOffset, TotalConstOffset};
-}
-
-bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
- bool IsFormat) const {
- MachineIRBuilder B(MI);
- MachineRegisterInfo &MRI = *B.getMRI();
- MachineFunction &MF = B.getMF();
- Register VData = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(VData);
-
- int Size = Ty.getSizeInBits();
- if (Size % 32 != 0)
- return false;
-
- // FIXME: Verifier should enforce 1 MMO for these intrinsics.
- MachineMemOperand *MMO = *MI.memoperands_begin();
- const int MemSize = MMO->getSize();
-
- Register RSrc = MI.getOperand(2).getReg();
- Register VOffset = MI.getOperand(3).getReg();
- Register SOffset = MI.getOperand(4).getReg();
- unsigned CachePolicy = MI.getOperand(5).getImm();
- unsigned ImmOffset;
- unsigned TotalOffset;
-
- std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
- if (TotalOffset != 0)
- MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
-
- const bool Offen = !isZero(VOffset, MRI);
-
- int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
- getBufferStoreOpcode(Ty, MemSize, Offen);
- if (Opc == -1)
- return false;
-
- MachineInstrBuilder MIB = B.buildInstr(Opc)
- .addUse(VData);
-
- if (Offen)
- MIB.addUse(VOffset);
-
- MIB.addUse(RSrc)
- .addUse(SOffset)
- .addImm(ImmOffset)
- .addImm(extractGLC(CachePolicy))
- .addImm(extractSLC(CachePolicy))
- .addImm(0) // tfe: FIXME: Remove from inst
- .addImm(extractDLC(CachePolicy))
- .addMemOperand(MMO);
-
- MI.eraseFromParent();
-
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@@ -997,10 +746,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
- int64_t Tgt = I.getOperand(1).getImm();
- int64_t Enabled = I.getOperand(2).getImm();
- int64_t Done = I.getOperand(7).getImm();
- int64_t VM = I.getOperand(8).getImm();
+ int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
+ int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
+ int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
+ int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
I.getOperand(4).getReg(),
@@ -1013,13 +762,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
}
case Intrinsic::amdgcn_exp_compr: {
const DebugLoc &DL = I.getDebugLoc();
- int64_t Tgt = I.getOperand(1).getImm();
- int64_t Enabled = I.getOperand(2).getImm();
+ int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
+ int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
Register Reg0 = I.getOperand(3).getReg();
Register Reg1 = I.getOperand(4).getReg();
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- int64_t Done = I.getOperand(5).getImm();
- int64_t VM = I.getOperand(6).getImm();
+ int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
+ int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
@@ -1042,10 +791,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
return true;
}
- case Intrinsic::amdgcn_raw_buffer_store:
- return selectStoreIntrinsic(I, false);
- case Intrinsic::amdgcn_raw_buffer_store_format:
- return selectStoreIntrinsic(I, true);
default:
return selectImpl(I, *CoverageInfo);
}
OpenPOWER on IntegriCloud