summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp153
1 files changed, 131 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 7cf5f802c09..a141a22c53f 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -14,6 +14,7 @@
#include "SIMachineFunctionInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -441,6 +442,42 @@ static bool isUseSafeToFold(const SIInstrInfo *TII,
//return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
}
+// Find a def of the UseReg, check if it is a reg_seqence and find initializers
+// for each subreg, tracking it to foldable inline immediate if possible.
+// Returns true on success.
+static bool getRegSeqInit(
+ SmallVectorImpl<std::pair<MachineOperand*, unsigned>> &Defs,
+ Register UseReg, uint8_t OpTy,
+ const SIInstrInfo *TII, const MachineRegisterInfo &MRI) {
+ MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
+ if (!Def || !Def->isRegSequence())
+ return false;
+
+ for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
+ MachineOperand *Sub = &Def->getOperand(I);
+ assert (Sub->isReg());
+
+ for (MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub->getReg());
+ SubDef && Sub->isReg() && !Sub->getSubReg() &&
+ TII->isFoldableCopy(*SubDef);
+ SubDef = MRI.getUniqueVRegDef(Sub->getReg())) {
+ MachineOperand *Op = &SubDef->getOperand(1);
+ if (Op->isImm()) {
+ if (TII->isInlineConstant(*Op, OpTy))
+ Sub = Op;
+ break;
+ }
+ if (!Op->isReg())
+ break;
+ Sub = Op;
+ }
+
+ Defs.push_back(std::make_pair(Sub, Def->getOperand(I + 1).getImm()));
+ }
+
+ return true;
+}
+
static bool tryToFoldACImm(const SIInstrInfo *TII,
const MachineOperand &OpToFold,
MachineInstr *UseMI,
@@ -474,39 +511,30 @@ static bool tryToFoldACImm(const SIInstrInfo *TII,
return false;
MachineRegisterInfo &MRI = UseMI->getParent()->getParent()->getRegInfo();
- const MachineInstr *Def = MRI.getUniqueVRegDef(UseReg);
- if (!Def || !Def->isRegSequence())
+ SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
+ if (!getRegSeqInit(Defs, UseReg, OpTy, TII, MRI))
return false;
- int64_t Imm;
- MachineOperand *Op;
- for (unsigned I = 1, E = Def->getNumExplicitOperands(); I < E; I += 2) {
- const MachineOperand &Sub = Def->getOperand(I);
- if (!Sub.isReg() || Sub.getSubReg())
+ int32_t Imm;
+ for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
+ const MachineOperand *Op = Defs[I].first;
+ if (!Op->isImm())
return false;
- MachineInstr *SubDef = MRI.getUniqueVRegDef(Sub.getReg());
- while (SubDef && !SubDef->isMoveImmediate() &&
- !SubDef->getOperand(1).isImm() && TII->isFoldableCopy(*SubDef))
- SubDef = MRI.getUniqueVRegDef(SubDef->getOperand(1).getReg());
- if (!SubDef || !SubDef->isMoveImmediate() || !SubDef->getOperand(1).isImm())
- return false;
- Op = &SubDef->getOperand(1);
+
auto SubImm = Op->getImm();
- if (I == 1) {
- if (!TII->isInlineConstant(SubDef->getOperand(1), OpTy))
+ if (!I) {
+ Imm = SubImm;
+ if (!TII->isInlineConstant(*Op, OpTy) ||
+ !TII->isOperandLegal(*UseMI, UseOpIdx, Op))
return false;
- Imm = SubImm;
continue;
}
if (Imm != SubImm)
return false; // Can only fold splat constants
}
- if (!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
- return false;
-
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, Op);
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, Defs[0].first);
return true;
}
@@ -645,11 +673,92 @@ void SIFoldOperands::foldOperand(
LLVM_DEBUG(dbgs() << "Folding " << OpToFold
<< "\n into " << *UseMI << '\n');
unsigned Size = TII->getOpSize(*UseMI, 1);
- UseMI->getOperand(1).setReg(OpToFold.getReg());
+ Register UseReg = OpToFold.getReg();
+ UseMI->getOperand(1).setReg(UseReg);
UseMI->getOperand(1).setSubReg(OpToFold.getSubReg());
UseMI->getOperand(1).setIsKill(false);
CopiesToReplace.push_back(UseMI);
OpToFold.setIsKill(false);
+
+ // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32
+ // can only accept VGPR or inline immediate. Recreate a reg_sequence with
+ // its initializers right here, so we will rematerialize immediates and
+ // avoid copies via different reg classes.
+ SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
+ if (Size > 4 && TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
+ getRegSeqInit(Defs, UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32, TII,
+ *MRI)) {
+ const DebugLoc &DL = UseMI->getDebugLoc();
+ MachineBasicBlock &MBB = *UseMI->getParent();
+
+ UseMI->setDesc(TII->get(AMDGPU::REG_SEQUENCE));
+ for (unsigned I = UseMI->getNumOperands() - 1; I > 0; --I)
+ UseMI->RemoveOperand(I);
+
+ MachineInstrBuilder B(*MBB.getParent(), UseMI);
+ DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
+ SmallSetVector<TargetInstrInfo::RegSubRegPair, 32> SeenAGPRs;
+ for (unsigned I = 0; I < Size / 4; ++I) {
+ MachineOperand *Def = Defs[I].first;
+ TargetInstrInfo::RegSubRegPair CopyToVGPR;
+ if (Def->isImm() &&
+ TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ int64_t Imm = Def->getImm();
+
+ auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL,
+ TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), Tmp).addImm(Imm);
+ B.addReg(Tmp);
+ } else if (Def->isReg() && TRI->isAGPR(*MRI, Def->getReg())) {
+ auto Src = getRegSubRegPair(*Def);
+ Def->setIsKill(false);
+ if (!SeenAGPRs.insert(Src)) {
+ // We cannot build a reg_sequence out of the same registers, they
+ // must be copied. Better do it here before copyPhysReg() created
+ // several reads to do the AGPR->VGPR->AGPR copy.
+ CopyToVGPR = Src;
+ } else {
+ B.addReg(Src.Reg, Def->isUndef() ? RegState::Undef : 0,
+ Src.SubReg);
+ }
+ } else {
+ assert(Def->isReg());
+ Def->setIsKill(false);
+ auto Src = getRegSubRegPair(*Def);
+
+ // Direct copy from SGPR to AGPR is not possible. To avoid creation
+ // of exploded copies SGPR->VGPR->AGPR in the copyPhysReg() later,
+ // create a copy here and track if we already have such a copy.
+ if (TRI->isSGPRReg(*MRI, Src.Reg)) {
+ CopyToVGPR = Src;
+ } else {
+ auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Tmp).add(*Def);
+ B.addReg(Tmp);
+ }
+ }
+
+ if (CopyToVGPR.Reg) {
+ Register Vgpr;
+ if (VGPRCopies.count(CopyToVGPR)) {
+ Vgpr = VGPRCopies[CopyToVGPR];
+ } else {
+ Vgpr = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL, TII->get(AMDGPU::COPY), Vgpr).add(*Def);
+ VGPRCopies[CopyToVGPR] = Vgpr;
+ }
+ auto Tmp = MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
+ BuildMI(MBB, UseMI, DL,
+ TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), Tmp).addReg(Vgpr);
+ B.addReg(Tmp);
+ }
+
+ B.addImm(Defs[I].second);
+ }
+ LLVM_DEBUG(dbgs() << "Folded " << *UseMI << '\n');
+ return;
+ }
+
if (Size != 4)
return;
if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) &&
OpenPOWER on IntegriCloud