summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2020-01-02 16:45:33 -0500
committerMatt Arsenault <arsenm2@gmail.com>2020-01-09 19:52:24 -0500
commit35c3d101aee240f6c034f25ff6800fda22a89987 (patch)
tree05d1393ef43cbbdfdd2a63d6ed304e06b87876bc /llvm/lib/Target/AMDGPU
parent5cabb8357aeb3bbecaef4825c3a594f86ef94c8d (diff)
downloadbcm5719-llvm-35c3d101aee240f6c034f25ff6800fda22a89987.tar.gz
bcm5719-llvm-35c3d101aee240f6c034f25ff6800fda22a89987.zip
AMDGPU/GlobalISel: Select G_EXTRACT_VECTOR_ELT
Doesn't try to do the fold into the base register of an add of a constant in the index like the DAG path does.
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp76
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp9
5 files changed, 89 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 72ccf0df4f3..5dacc0993fc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1605,6 +1605,80 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const {
return true;
}
+bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
+ MachineInstr &MI) const {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register IdxReg = MI.getOperand(2).getReg();
+
+ LLT DstTy = MRI->getType(DstReg);
+ LLT SrcTy = MRI->getType(SrcReg);
+
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
+ const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
+ const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
+
+ // The index must be scalar. If it wasn't RegBankSelect should have moved this
+ // into a waterfall loop.
+ if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
+ return false;
+
+ const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB,
+ *MRI);
+ const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB,
+ *MRI);
+ if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
+ !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
+ !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
+ return false;
+
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const bool Is64 = DstTy.getSizeInBits() == 64;
+
+ unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+
+ if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
+ if (DstTy.getSizeInBits() != 32 && !Is64)
+ return false;
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+
+ unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
+ BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)
+ .addReg(SrcReg, 0, SubReg)
+ .addReg(SrcReg, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)
+ return false;
+
+ if (!STI.useVGPRIndexMode()) {
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(IdxReg);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
+ .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, RegState::Implicit);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON))
+ .addReg(IdxReg)
+ .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg)
+ .addReg(SrcReg, RegState::Undef, SubReg)
+ .addReg(SrcReg, RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
+ BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF));
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
@@ -1693,6 +1767,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_FRAME_INDEX(I);
case TargetOpcode::G_PTR_MASK:
return selectG_PTR_MASK(I);
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ return selectG_EXTRACT_VECTOR_ELT(I);
default:
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 633c4d35137..d884afbe770 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -116,6 +116,7 @@ private:
bool selectG_BRCOND(MachineInstr &I) const;
bool selectG_FRAME_INDEX(MachineInstr &I) const;
bool selectG_PTR_MASK(MachineInstr &I) const;
+ bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const;
std::pair<Register, unsigned>
selectVOP3ModsImpl(Register Src) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 46aea16a2be..16bde062b1d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -45,6 +45,11 @@ static cl::opt<bool> DisablePowerSched(
cl::desc("Disable scheduling to minimize mAI power bursts"),
cl::init(false));
+static cl::opt<bool> EnableVGPRIndexMode(
+ "amdgpu-vgpr-index-mode",
+ cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
+ cl::init(false));
+
GCNSubtarget::~GCNSubtarget() = default;
R600Subtarget &
@@ -561,6 +566,10 @@ bool GCNSubtarget::hasMadF16() const {
return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
}
+bool GCNSubtarget::useVGPRIndexMode() const {
+ return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode());
+}
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return getMaxWavesPerEU();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 08878d87fb0..b0188b003c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -941,9 +941,7 @@ public:
return HasVGPRIndexMode;
}
- bool useVGPRIndexMode(bool UserEnable) const {
- return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
- }
+ bool useVGPRIndexMode() const;
bool hasScalarCompareEq64() const {
return getGeneration() >= VOLCANIC_ISLANDS;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 79495961870..e73d87cd66a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -90,11 +90,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool> EnableVGPRIndexMode(
- "amdgpu-vgpr-index-mode",
- cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
- cl::init(false));
-
static cl::opt<bool> DisableLoopAlignment(
"amdgpu-disable-loop-alignment",
cl::desc("Do not align and prefetch loops"),
@@ -3415,7 +3410,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
std::tie(SubReg, Offset)
= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
- bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
+ const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
MachineBasicBlock::iterator I(&MI);
@@ -3510,7 +3505,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
SrcVec->getReg(),
Offset);
- bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
+ const bool UseGPRIdxMode = ST.useVGPRIndexMode();
if (Idx->getReg() == AMDGPU::NoRegister) {
MachineBasicBlock::iterator I(&MI);
OpenPOWER on IntegriCloud