diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h | 57 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/CMakeLists.txt | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/FLATInstructions.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 491 |
8 files changed, 591 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 568682899be..bbb542eb4fc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -45,6 +45,7 @@ FunctionPass *createSILoadStoreOptimizerPass(); FunctionPass *createSIWholeQuadModePass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIFixSGPRCopiesPass(); +FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); FunctionPass *createSIInsertWaitcntsPass(); @@ -125,6 +126,9 @@ extern char &AMDGPUCodeGenPrepareID; void initializeSIAnnotateControlFlowPass(PassRegistry&); extern char &SIAnnotateControlFlowPassID; +void initializeSIMemoryLegalizerPass(PassRegistry&); +extern char &SIMemoryLegalizerID; + void initializeSIDebuggerInsertNopsPass(PassRegistry&); extern char &SIDebuggerInsertNopsID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp new file mode 100644 index 00000000000..3164140abe2 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp @@ -0,0 +1,29 @@ +//===--- AMDGPUMachineModuleInfo.cpp ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Machine Module Info. +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMachineModuleInfo.h" +#include "llvm/IR/Module.h" + +namespace llvm { + +AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI) + : MachineModuleInfoELF(MMI) { + LLVMContext &CTX = MMI.getModule()->getContext(); + AgentSSID = CTX.getOrInsertSyncScopeID("agent"); + WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup"); + WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront"); +} + +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h new file mode 100644 index 00000000000..98fe75b7a13 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h @@ -0,0 +1,57 @@ +//===--- AMDGPUMachineModuleInfo.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Machine Module Info. +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEMODULEINFO_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEMODULEINFO_H + +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { + +class AMDGPUMachineModuleInfo final : public MachineModuleInfoELF { +private: + + // All supported memory/synchronization scopes can be found here: + // http://llvm.org/docs/AMDGPUUsage.html#memory-scopes + + /// \brief Agent synchronization scope ID. + SyncScope::ID AgentSSID; + /// \brief Workgroup synchronization scope ID. + SyncScope::ID WorkgroupSSID; + /// \brief Wavefront synchronization scope ID. + SyncScope::ID WavefrontSSID; + +public: + AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI); + + /// \returns Agent synchronization scope ID. + SyncScope::ID getAgentSSID() const { + return AgentSSID; + } + /// \returns Workgroup synchronization scope ID. + SyncScope::ID getWorkgroupSSID() const { + return WorkgroupSSID; + } + /// \returns Wavefront synchronization scope ID. + SyncScope::ID getWavefrontSSID() const { + return WavefrontSSID; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEMODULEINFO_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index dc868f010d8..1f8b6a88faa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -150,6 +150,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSIWholeQuadModePass(*PR); initializeSILowerControlFlowPass(*PR); initializeSIInsertSkipsPass(*PR); + initializeSIMemoryLegalizerPass(*PR); initializeSIDebuggerInsertNopsPass(*PR); initializeSIOptimizeExecMaskingPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); @@ -806,6 +807,7 @@ void GCNPassConfig::addPreEmitPass() { addPass(createSIInsertWaitsPass()); addPass(createSIShrinkInstructionsPass()); addPass(&SIInsertSkipsPassID); + addPass(createSIMemoryLegalizerPass()); addPass(createSIDebuggerInsertNopsPass()); addPass(&BranchRelaxationPassID); } diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 89caa65cdaf..cbf9d1fa700 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1058,12 +1058,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins def : Pat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; def : Pat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } @@ -1174,12 +1174,12 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In def : Pat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; def : Pat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } let Predicates = [isSICI] in { diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 971208c5db8..164a57d848d 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -51,6 +51,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUMCInstLower.cpp AMDGPUMachineCFGStructurizer.cpp AMDGPUMachineFunction.cpp + AMDGPUMachineModuleInfo.cpp AMDGPUUnifyMetadata.cpp AMDGPUOpenCLImageTypeLoweringPass.cpp AMDGPUSubtarget.cpp @@ -93,6 +94,7 @@ add_llvm_target(AMDGPUCodeGen SILowerI1Copies.cpp SIMachineFunctionInfo.cpp SIMachineScheduler.cpp + SIMemoryLegalizer.cpp SIOptimizeExecMasking.cpp SIPeepholeSDWA.cpp SIRegisterInfo.cpp diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index b038cc4fa2c..8913b620c4f 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -620,7 +620,7 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), - (inst $vaddr, $offset, 1, $slc) + (inst $vaddr, $offset, 0, $slc) >; class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat < @@ -632,7 +632,7 @@ class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt // atomic store follows atomic binop convention so the address comes // first. (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 1, $slc) + (inst $vaddr, $data, $offset, 0, $slc) >; class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp new file mode 100644 index 00000000000..48812c84b20 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -0,0 +1,491 @@ +//===--- SIMemoryLegalizer.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Memory legalizer - implements memory model. More information can be +/// found here: +/// http://llvm.org/docs/AMDGPUUsage.html#memory-model +/// +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUMachineModuleInfo.h" +#include "AMDGPUSubtarget.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/DiagnosticInfo.h" + +using namespace llvm; +using namespace llvm::AMDGPU; + +#define DEBUG_TYPE "si-memory-legalizer" +#define PASS_NAME "SI Memory Legalizer" + +namespace { + +class SIMemoryLegalizer final : public MachineFunctionPass { +private: + struct AtomicInfo final { + SyncScope::ID SSID = SyncScope::System; + AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; + AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; + + AtomicInfo() {} + + AtomicInfo(SyncScope::ID SSID, + AtomicOrdering Ordering, + AtomicOrdering FailureOrdering) + : SSID(SSID), + Ordering(Ordering), + FailureOrdering(FailureOrdering) {} + + AtomicInfo(const MachineMemOperand *MMO) + : SSID(MMO->getSyncScopeID()), + Ordering(MMO->getOrdering()), + FailureOrdering(MMO->getFailureOrdering()) {} + }; + + /// \brief LLVM context. + LLVMContext *CTX = nullptr; + /// \brief Machine module info. + const AMDGPUMachineModuleInfo *MMI = nullptr; + /// \brief Instruction info. + const SIInstrInfo *TII = nullptr; + + /// \brief Immediate for "vmcnt(0)". + unsigned Vmcnt0Immediate = 0; + /// \brief Opcode for cache invalidation instruction (L1). + unsigned Wbinvl1Opcode = 0; + + /// \brief List of atomic pseudo instructions. + std::list<MachineBasicBlock::iterator> AtomicPseudoMIs; + + /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. + /// Always returns true. + bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, + bool Before = true) const; + /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. + /// Always returns true. + bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, + bool Before = true) const; + + /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is + /// modified, false otherwise. + bool setGLC(const MachineBasicBlock::iterator &MI) const; + + /// \brief Removes all processed atomic pseudo instructions from the current + /// function. Returns true if current function is modified, false otherwise. + bool removeAtomicPseudoMIs(); + + /// \brief Reports unknown synchronization scope used in \p MI to LLVM + /// context. + void reportUnknownSynchScope(const MachineBasicBlock::iterator &MI); + + /// \returns Atomic fence info if \p MI is an atomic fence operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicFenceInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic load info if \p MI is an atomic load operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicLoadInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic store info if \p MI is an atomic store operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicStoreInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicCmpxchgInfo( + const MachineBasicBlock::iterator &MI) const; + /// \returns Atomic rmw info if \p MI is an atomic rmw operation, + /// "None" otherwise. + Optional<AtomicInfo> getAtomicRmwInfo( + const MachineBasicBlock::iterator &MI) const; + + /// \brief Expands atomic fence operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicFence(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic load operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicLoad(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic store operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicStore(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic cmpxchg operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicCmpxchg(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + /// \brief Expands atomic rmw operation \p MI. Returns true if + /// instructions are added/deleted or \p MI is modified, false otherwise. + bool expandAtomicRmw(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI); + +public: + static char ID; + + SIMemoryLegalizer() + : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return PASS_NAME; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // end namespace anonymous + +bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, + bool Before) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (!Before) + ++MI; + + BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); + + if (!Before) + --MI; + + return true; +} + +bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, + bool Before) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + if (!Before) + ++MI; + + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); + + if (!Before) + --MI; + + return true; +} + +bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { + int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); + if (GLCIdx == -1) + return false; + + MachineOperand &GLC = MI->getOperand(GLCIdx); + if (GLC.getImm() == 1) + return false; + + GLC.setImm(1); + return true; +} + +bool SIMemoryLegalizer::removeAtomicPseudoMIs() { + if (AtomicPseudoMIs.empty()) + return false; + + for (auto &MI : AtomicPseudoMIs) + MI->eraseFromParent(); + + AtomicPseudoMIs.clear(); + return true; +} + +void SIMemoryLegalizer::reportUnknownSynchScope( + const MachineBasicBlock::iterator &MI) { + DiagnosticInfoUnsupported Diag(*MI->getParent()->getParent()->getFunction(), + "Unsupported synchronization scope"); + CTX->diagnose(Diag); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicFenceInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) + return None; + + SyncScope::ID SSID = + static_cast<SyncScope::ID>(MI->getOperand(1).getImm()); + AtomicOrdering Ordering = + static_cast<AtomicOrdering>(MI->getOperand(0).getImm()); + return AtomicInfo(SSID, Ordering, AtomicOrdering::NotAtomic); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicLoadInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(MI->mayLoad() && !MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + + return AtomicInfo(MMO); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicStoreInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(!MI->mayLoad() && MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + + return AtomicInfo(MMO); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicCmpxchgInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(MI->mayLoad() && MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + if (MMO->getFailureOrdering() == AtomicOrdering::NotAtomic) + return None; + + return AtomicInfo(MMO); +} + +Optional<SIMemoryLegalizer::AtomicInfo> SIMemoryLegalizer::getAtomicRmwInfo( + const MachineBasicBlock::iterator &MI) const { + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); + + if (!(MI->mayLoad() && MI->mayStore())) + return None; + if (!MI->hasOneMemOperand()) + return AtomicInfo(); + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + if (!MMO->isAtomic()) + return None; + if (MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) + return None; + + return AtomicInfo(MMO); +} + +bool SIMemoryLegalizer::expandAtomicFence(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertBufferWbinvl1Vol(MI); + + AtomicPseudoMIs.push_back(MI); + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + AtomicPseudoMIs.push_back(MI); + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicLoad(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && !MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= setGLC(MI); + + if (AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) { + Changed |= insertWaitcntVmcnt0(MI, false); + Changed |= insertBufferWbinvl1Vol(MI, false); + } + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicStore(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(!MI->mayLoad() && MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicCmpxchg(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent || + AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent || + AI.FailureOrdering == AtomicOrdering::Acquire || + AI.FailureOrdering == AtomicOrdering::SequentiallyConsistent) { + Changed |= insertWaitcntVmcnt0(MI, false); + Changed |= insertBufferWbinvl1Vol(MI, false); + } + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + Changed |= setGLC(MI); + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::expandAtomicRmw(const AtomicInfo &AI, + MachineBasicBlock::iterator &MI) { + assert(MI->mayLoad() && MI->mayStore()); + + bool Changed = false; + if (AI.SSID == SyncScope::System || + AI.SSID == MMI->getAgentSSID()) { + if (AI.Ordering == AtomicOrdering::Release || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= insertWaitcntVmcnt0(MI); + + if (AI.Ordering == AtomicOrdering::Acquire || + AI.Ordering == AtomicOrdering::AcquireRelease || + AI.Ordering == AtomicOrdering::SequentiallyConsistent) { + Changed |= insertWaitcntVmcnt0(MI, false); + Changed |= insertBufferWbinvl1Vol(MI, false); + } + + return Changed; + } else if (AI.SSID == SyncScope::SingleThread || + AI.SSID == MMI->getWorkgroupSSID() || + AI.SSID == MMI->getWavefrontSSID()) { + Changed |= setGLC(MI); + return Changed; + } else { + reportUnknownSynchScope(MI); + return Changed; + } +} + +bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); + + CTX = &MF.getFunction()->getContext(); + MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>(); + TII = ST.getInstrInfo(); + + Vmcnt0Immediate = + AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); + Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? + AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; + + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { + if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) + continue; + + if (const auto &AI = getAtomicFenceInfo(MI)) + Changed |= expandAtomicFence(AI.getValue(), MI); + else if (const auto &AI = getAtomicLoadInfo(MI)) + Changed |= expandAtomicLoad(AI.getValue(), MI); + else if (const auto &AI = getAtomicStoreInfo(MI)) + Changed |= expandAtomicStore(AI.getValue(), MI); + else if (const auto &AI = getAtomicCmpxchgInfo(MI)) + Changed |= expandAtomicCmpxchg(AI.getValue(), MI); + else if (const auto &AI = getAtomicRmwInfo(MI)) + Changed |= expandAtomicRmw(AI.getValue(), MI); + } + } + + Changed |= removeAtomicPseudoMIs(); + return Changed; +} + +INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) + +char SIMemoryLegalizer::ID = 0; +char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; + +FunctionPass *llvm::createSIMemoryLegalizerPass() { + return new SIMemoryLegalizer(); +} |