diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-09-19 20:54:38 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2017-09-19 20:54:38 +0000 |
| commit | d4ae470d2e7a967331a5bb3ae5be29a4f0b6c764 (patch) | |
| tree | 9065b407288d50bfeb20d5827ea6fb2dcbf008ca /llvm/lib/Target | |
| parent | 59a01a958a55c75af4076e35ba67e2a4a38d8dce (diff) | |
| download | bcm5719-llvm-d4ae470d2e7a967331a5bb3ae5be29a4f0b6c764.tar.gz bcm5719-llvm-d4ae470d2e7a967331a5bb3ae5be29a4f0b6c764.zip | |
[AMDGPU] Prevent post-RA scheduler from breaking memory clauses
The pre-RA scheduler does load/store clustering, but post-RA
scheduler undoes it. Add mutation to prevent it.
Differential Revision: https://reviews.llvm.org/D38014
llvm-svn: 313670
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 54 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 4 |
2 files changed, 58 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 18fdaf441e0..59f9baf9af0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -524,3 +524,57 @@ unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { return MaxNumVGPRs - getReservedNumVGPRs(MF); } + +struct MemOpClusterMutation : ScheduleDAGMutation { + const SIInstrInfo *TII; + + MemOpClusterMutation(const SIInstrInfo *tii) : TII(tii) {} + + void apply(ScheduleDAGInstrs *DAGInstrs) override { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); + + SUnit *SUa = nullptr; + // Search for two consequent memory operations and link them + // to prevent scheduler from moving them apart. + // In DAG pre-process SUnits are in the original order of + // the instructions before scheduling. + for (SUnit &SU : DAG->SUnits) { + MachineInstr &MI2 = *SU.getInstr(); + if (!MI2.mayLoad() && !MI2.mayStore()) { + SUa = nullptr; + continue; + } + if (!SUa) { + SUa = &SU; + continue; + } + + MachineInstr &MI1 = *SUa->getInstr(); + if ((TII->isVMEM(MI1) && TII->isVMEM(MI2)) || + (TII->isFLAT(MI1) && TII->isFLAT(MI2)) || + (TII->isSMRD(MI1) && TII->isSMRD(MI2)) || + (TII->isDS(MI1) && TII->isDS(MI2))) { + SU.addPredBarrier(SUa); + + for (const SDep &SI : SU.Preds) { + if (SI.getSUnit() != SUa) + SUa->addPred(SDep(SI.getSUnit(), SDep::Artificial)); + } + + if (&SU != &DAG->ExitSU) { + for (const SDep &SI : SUa->Succs) { + if (SI.getSUnit() != &SU) + SI.getSUnit()->addPred(SDep(&SU, SDep::Artificial)); + } + } + } + + SUa = &SU; + } + } +}; + +void SISubtarget::getPostRAMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const { + Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo)); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index c2ae2227830..7e7a09648ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -883,6 +883,10 @@ public: /// subtarget's specifications, or does not meet number of waves per execution /// unit requirement. unsigned getMaxNumVGPRs(const MachineFunction &MF) const; + + void getPostRAMutations( + std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) + const override; }; } // end namespace llvm |

