diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp new file mode 100644 index 00000000000..48812c84b20 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -0,0 +1,491 @@ +//===--- SIMemoryLegalizer.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Memory legalizer - implements memory model. More information can be +/// found here: +/// http://llvm.org/docs/AMDGPUUsage.html#memory-model +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUMachineModuleInfo.h" +#include "AMDGPUSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/DiagnosticInfo.h" + +using namespace llvm; +using namespace llvm::AMDGPU; + +#define DEBUG_TYPE "si-memory-legalizer" +#define PASS_NAME "SI Memory Legalizer" + +namespace { + +class SIMemoryLegalizer final : public MachineFunctionPass { +private: + struct AtomicInfo final { + SyncScope::ID SSID = SyncScope::System; + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; + AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; + + AtomicInfo() {} + + AtomicInfo(SyncScope::ID SSID, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) + : SSID(SSID), + Ordering(Ordering), + FailureOrdering(FailureOrdering) {} + + AtomicInfo(const MachineMemOperand *MMO) + : SSID(MMO->getSyncScopeID()), + Ordering(MMO->getOrdering()), + FailureOrdering(MMO->getFailureOrdering()) {} + }; + + /// \brief LLVM context. + LLVMContext *CTX = nullptr; + /// \brief Machine module info. + const AMDGPUMachineModuleInfo *MMI = nullptr; + /// \brief Instruction info. + const SIInstrInfo *TII = nullptr; + + /// \brief Immediate for "vmcnt(0)". + unsigned Vmcnt0Immediate = 0; + /// \brief Opcode for cache invalidation instruction (L1). + unsigned Wbinvl1Opcode = 0; + + /// \brief List of atomic pseudo instructions. + std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; + + /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. + /// Always returns true. + bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, + bool Before = true) const; + /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. + /// Always returns true. + bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, + bool Before = true) const; + + /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is + /// modified, false otherwise. + bool setGLC(const MachineBasicBlock::iterator &MI) const; + + /// \brief Removes all processed atomic pseudo instructions from the current + /// function. Returns true if current function is modified, false otherwise. + bool removeAtomicPseudoMIs(); + + /// \brief Reports unknown synchronization scope used in \p MI to LLVM + /// context. + void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); + + /// \returns Atomic fence info if \p MI is an atomic fence operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicFenceInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic load info if \p MI is an atomic load operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicLoadInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic store info if \p MI is an atomic store operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicStoreInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicCmpxchgInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic rmw info if \p MI is an atomic rmw operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicRmwInfo( + const MachineBasicBlock::iterator &MI) const; + + /// \brief Expands atomic fence operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicFence(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic load operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicLoad(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic store operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicStore(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic cmpxchg operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicCmpxchg(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic rmw operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicRmw(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + +public: + static char ID; + + SIMemoryLegalizer() + : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return PASS_NAME; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end namespace anonymous + +bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, + bool Before) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (!Before) + ++MI; + + BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); + + if (!Before) + --MI; + + return true; +} + +bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, + bool Before) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (!Before) + ++MI; + + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); + + if (!Before) + --MI; + + return true; +} + +bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { + int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); + if (GLCIdx == -1) + return false; + + MachineOperand &GLC = MI->getOperand(GLCIdx); + if (GLC.getImm() == 1) + return false; + + GLC.setImm(1); + return true; +} + +bool SIMemoryLegalizer::removeAtomicPseudoMIs() { + if (AtomicPseudoMIs.empty()) + return false; + + for (auto &MI : AtomicPseudoMIs) + MI->eraseFromParent(); + + AtomicPseudoMIs.clear(); + return true; +} + +void SIMemoryLegalizer::reportUnknownSynchScope( + const MachineBasicBlock::iterator &MI) { + DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), + "Unsupported synchronization scope"); + CTX->diagnose(Diag); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) + return None; + + SyncScope::ID SSID = + static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); + return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(MI->mayLoad() && !MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + + return AtomicInfo(MMO); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(!MI->mayLoad() && MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + + return AtomicInfo(MMO); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(MI->mayLoad() && MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic) + return None; + + return AtomicInfo(MMO); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(MI->mayLoad() && MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) + return None; + + return AtomicInfo(MMO); +} + +bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertBufferWbinvl1Vol(MI); + + AtomicPseudoMIs.push_back(MI); + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + AtomicPseudoMIs.push_back(MI); + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && !MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= setGLC(MI); + + if (AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) { + Changed |= insertWaitcntVmcnt0(MI, false); + Changed |= insertBufferWbinvl1Vol(MI, false); + } + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(!MI->mayLoad() && MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent || + AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent || + AI.FailureOrdering == AtomicOrdering::Acquire || + AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) { + Changed |= insertWaitcntVmcnt0(MI, false); + Changed |= insertBufferWbinvl1Vol(MI, false); + } + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + Changed |= setGLC(MI); + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) { + Changed |= insertWaitcntVmcnt0(MI, false); + Changed |= insertBufferWbinvl1Vol(MI, false); + } + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + Changed |= setGLC(MI); + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); + + CTX = &MF.getFunction()->getContext(); + MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); + TII = ST.getInstrInfo(); + + Vmcnt0Immediate = + AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); + Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? + AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; + + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { + if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) + continue; + + if (const auto &AI = getAtomicFenceInfo(MI)) + Changed |= expandAtomicFence(AI.getValue(), MI); + else if (const auto &AI = getAtomicLoadInfo(MI)) + Changed |= expandAtomicLoad(AI.getValue(), MI); + else if (const auto &AI = getAtomicStoreInfo(MI)) + Changed |= expandAtomicStore(AI.getValue(), MI); + else if (const auto &AI = getAtomicCmpxchgInfo(MI)) + Changed |= expandAtomicCmpxchg(AI.getValue(), MI); + else if (const auto &AI = getAtomicRmwInfo(MI)) + Changed |= expandAtomicRmw(AI.getValue(), MI); + } + } + + Changed |= removeAtomicPseudoMIs(); + return Changed; +} + +INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) + +char SIMemoryLegalizer::ID = 0; +char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; + +FunctionPass *llvm::createSIMemoryLegalizerPass() { + return new SIMemoryLegalizer(); +} |