diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2020-01-02 16:45:33 -0500 |
|---|---|---|
| committer | Matt Arsenault <arsenm2@gmail.com> | 2020-01-09 19:52:24 -0500 |
| commit | 35c3d101aee240f6c034f25ff6800fda22a89987 (patch) | |
| tree | 05d1393ef43cbbdfdd2a63d6ed304e06b87876bc /llvm/lib/Target/AMDGPU | |
| parent | 5cabb8357aeb3bbecaef4825c3a594f86ef94c8d (diff) | |
| download | bcm5719-llvm-35c3d101aee240f6c034f25ff6800fda22a89987.tar.gz bcm5719-llvm-35c3d101aee240f6c034f25ff6800fda22a89987.zip | |
AMDGPU/GlobalISel: Select G_EXTRACT_VECTOR_ELT
Doesn't try to do the fold into the base register of an add of a
constant in the index like the DAG path does.
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 76 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 |
5 files changed, 89 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 72ccf0df4f3..5dacc0993fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1605,6 +1605,80 @@ bool AMDGPUInstructionSelector::selectG_PTR_MASK(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( + MachineInstr &MI) const { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register IdxReg = MI.getOperand(2).getReg(); + + LLT DstTy = MRI->getType(DstReg); + LLT SrcTy = MRI->getType(SrcReg); + + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); + const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI); + + // The index must be scalar. If it wasn't RegBankSelect should have moved this + // into a waterfall loop. + if (IdxRB->getID() != AMDGPU::SGPRRegBankID) + return false; + + const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB, + *MRI); + const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB, + *MRI); + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || + !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI)) + return false; + + MachineBasicBlock *BB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + const bool Is64 = DstTy.getSizeInBits() == 64; + + unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; + + if (SrcRB->getID() == AMDGPU::SGPRRegBankID) { + if (DstTy.getSizeInBits() != 32 && !Is64) + return false; + + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(IdxReg); + + unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32; + BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg) + .addReg(SrcReg, 0, SubReg) + .addReg(SrcReg, RegState::Implicit); + MI.eraseFromParent(); + return true; + } + + if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32) + return false; + + if (!STI.useVGPRIndexMode()) { + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) + .addReg(IdxReg); + BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg) + .addReg(SrcReg, RegState::Undef, SubReg) + .addReg(SrcReg, RegState::Implicit); + MI.eraseFromParent(); + return true; + } + + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_ON)) + .addReg(IdxReg) + .addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE); + BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), DstReg) + .addReg(SrcReg, RegState::Undef, SubReg) + .addReg(SrcReg, RegState::Implicit) + .addReg(AMDGPU::M0, RegState::Implicit); + BuildMI(*BB, MI, DL, TII.get(AMDGPU::S_SET_GPR_IDX_OFF)); + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); @@ -1693,6 +1767,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectG_FRAME_INDEX(I); case TargetOpcode::G_PTR_MASK: return selectG_PTR_MASK(I); + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + return selectG_EXTRACT_VECTOR_ELT(I); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 633c4d35137..d884afbe770 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -116,6 +116,7 @@ private: bool selectG_BRCOND(MachineInstr &I) const; bool selectG_FRAME_INDEX(MachineInstr &I) const; bool selectG_PTR_MASK(MachineInstr &I) const; + bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const; std::pair<Register, unsigned> selectVOP3ModsImpl(Register Src) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 46aea16a2be..16bde062b1d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -45,6 +45,11 @@ static cl::opt<bool> DisablePowerSched( cl::desc("Disable scheduling to minimize mAI power bursts"), cl::init(false)); +static cl::opt<bool> EnableVGPRIndexMode( + "amdgpu-vgpr-index-mode", + cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), + cl::init(false)); + GCNSubtarget::~GCNSubtarget() = default; R600Subtarget & @@ -561,6 +566,10 @@ bool GCNSubtarget::hasMadF16() const { return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1; } +bool GCNSubtarget::useVGPRIndexMode() const { + return !hasMovrel() || (EnableVGPRIndexMode && hasVGPRIndexMode()); +} + unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { if (getGeneration() >= AMDGPUSubtarget::GFX10) return getMaxWavesPerEU(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 08878d87fb0..b0188b003c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -941,9 +941,7 @@ public: return HasVGPRIndexMode; } - bool useVGPRIndexMode(bool UserEnable) const { - return !hasMovrel() || (UserEnable && hasVGPRIndexMode()); - } + bool useVGPRIndexMode() const; bool hasScalarCompareEq64() const { return getGeneration() >= VOLCANIC_ISLANDS; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 79495961870..e73d87cd66a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -90,11 +90,6 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt<bool> EnableVGPRIndexMode( - "amdgpu-vgpr-index-mode", - cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), - cl::init(false)); - static cl::opt<bool> DisableLoopAlignment( "amdgpu-disable-loop-alignment", cl::desc("Do not align and prefetch loops"), @@ -3415,7 +3410,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset); - bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode); + const bool UseGPRIdxMode = ST.useVGPRIndexMode(); if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) { MachineBasicBlock::iterator I(&MI); @@ -3510,7 +3505,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC, SrcVec->getReg(), Offset); - bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode); + const bool UseGPRIdxMode = ST.useVGPRIndexMode(); if (Idx->getReg() == AMDGPU::NoRegister) { MachineBasicBlock::iterator I(&MI); |

