summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-24 14:53:56 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-24 14:53:56 +0000
commit60957cb74c8869e2abd8996f616261bf4103305d (patch)
tree2f5a78b31d959b60a1629736a66828dacd40eb57 /llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
parentf27f794d473e777340ee96ac5d751a698039caf8 (diff)
downloadbcm5719-llvm-60957cb74c8869e2abd8996f616261bf4103305d.tar.gz
bcm5719-llvm-60957cb74c8869e2abd8996f616261bf4103305d.zip
AMDGPU: Fold frame index into MUBUF
This matters for byval uses outside of the entry block, which appear as copies. Previously, the only folding done was during selection, which could not see the underlying frame index. For any uses outside the entry block, the frame index was materialized in the entry block relative to the global scratch wave offset. This may produce worse code in cases where the offset ends up not fitting in the MUBUF offset field. A better heuristic would be helpfu for extreme frames. llvm-svn: 364185
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFoldOperands.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp54
1 files changed, 44 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3f566884f6b..78e6e39b05a 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -87,10 +87,11 @@ public:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
const GCNSubtarget *ST;
+ const SIMachineFunctionInfo *MFI;
void foldOperand(MachineOperand &OpToFold,
MachineInstr *UseMI,
- unsigned UseOpIdx,
+ int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
@@ -159,6 +160,17 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
}
}
+// TODO: Add heuristic that the frame index might not fit in the addressing mode
+// immediate offset to avoid materializing in loops.
+static bool frameIndexMayFold(const SIInstrInfo *TII,
+ const MachineInstr &UseMI,
+ int OpNo,
+ const MachineOperand &OpToFold) {
+ return OpToFold.isFI() &&
+ (TII->isMUBUF(UseMI) || TII->isFLATScratch(UseMI)) &&
+ OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), AMDGPU::OpName::vaddr);
+}
+
FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
}
@@ -290,7 +302,6 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineOperand *OpToFold,
const SIInstrInfo *TII) {
if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
-
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
unsigned Opc = MI->getOpcode();
if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
@@ -403,7 +414,7 @@ static bool isUseSafeToFold(const SIInstrInfo *TII,
void SIFoldOperands::foldOperand(
MachineOperand &OpToFold,
MachineInstr *UseMI,
- unsigned UseOpIdx,
+ int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
@@ -453,10 +464,28 @@ void SIFoldOperands::foldOperand(
return;
}
+ if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
+ // Sanity check that this is a stack access.
+ // FIXME: Should probably use stack pseudos before frame lowering.
+ MachineOperand *SOff = TII->getNamedOperand(*UseMI, AMDGPU::OpName::soffset);
+ if (!SOff->isReg() || (SOff->getReg() != MFI->getScratchWaveOffsetReg() &&
+ SOff->getReg() != MFI->getStackPtrOffsetReg()))
+ return;
+
+ if (TII->getNamedOperand(*UseMI, AMDGPU::OpName::srsrc)->getReg() !=
+ MFI->getScratchRSrcReg())
+ return;
- bool FoldingImm = OpToFold.isImm();
+ // A frame index will resolve to a positive constant, so it should always be
+ // safe to fold the addressing mode, even pre-GFX9.
+ UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
+ SOff->setReg(MFI->getStackPtrOffsetReg());
+ return;
+ }
- if (FoldingImm && UseMI->isCopy()) {
+ bool FoldingImmLike = OpToFold.isImm() || OpToFold.isFI();
+
+ if (FoldingImmLike && UseMI->isCopy()) {
unsigned DestReg = UseMI->getOperand(0).getReg();
const TargetRegisterClass *DestRC
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
@@ -517,7 +546,7 @@ void SIFoldOperands::foldOperand(
// %sgpr = V_READFIRSTLANE_B32 %vgpr
// =>
// %sgpr = S_MOV_B32 imm
- if (FoldingImm) {
+ if (FoldingImmLike) {
if (execMayBeModifiedBeforeUse(*MRI,
UseMI->getOperand(UseOpIdx).getReg(),
*OpToFold.getParent(),
@@ -528,7 +557,10 @@ void SIFoldOperands::foldOperand(
// FIXME: ChangeToImmediate should clear subreg
UseMI->getOperand(1).setSubReg(0);
- UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ if (OpToFold.isImm())
+ UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
+ else
+ UseMI->getOperand(1).ChangeToFrameIndex(OpToFold.getIndex());
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
return;
}
@@ -560,7 +592,7 @@ void SIFoldOperands::foldOperand(
return;
}
- if (!FoldingImm) {
+ if (!FoldingImmLike) {
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
// FIXME: We could try to change the instruction from 64-bit to 32-bit
@@ -904,6 +936,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
// in some cases. A better heuristic is needed.
if (isInlineConstantIfFolded(TII, *UseMI, OpNo, OpToFold)) {
foldOperand(OpToFold, UseMI, OpNo, FoldList, CopiesToReplace);
+ } else if (frameIndexMayFold(TII, *UseMI, OpNo, OpToFold)) {
+ foldOperand(OpToFold, UseMI, OpNo, FoldList,
+ CopiesToReplace);
} else {
if (++NumLiteralUses == 1) {
NonInlineUse = &*Use;
@@ -1170,8 +1205,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
-
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MFI = MF.getInfo<SIMachineFunctionInfo>();
// omod is ignored by hardware if IEEE bit is enabled. omod also does not
// correctly handle signed zeros.
OpenPOWER on IntegriCloud