diff options
author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2019-03-25 20:50:21 +0000 |
---|---|---|
committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2019-03-25 20:50:21 +0000 |
commit | 51809cbc98ce187608bd5f2c46d18c5ffb7b6031 (patch) | |
tree | 4e164de16c93595c2d56d94635b14621ab92ec4d /llvm/lib/Target/AMDGPU | |
parent | 65bd5d8aa3b72f9db6e6c0c8f7a66b8bd9e61bb8 (diff) | |
download | bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.tar.gz bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.zip |
AMDGPU: Add support for cross address space synchronization scopes
Differential Revision: https://reviews.llvm.org/D59517
llvm-svn: 356946
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h | 73 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 50 |
3 files changed, 101 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp index 1870c9ca5fa..4d9f08b3af0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp @@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI) AgentSSID = CTX.getOrInsertSyncScopeID("agent"); WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup"); WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront"); + SystemOneAddressSpaceSSID = + CTX.getOrInsertSyncScopeID("one-as"); + AgentOneAddressSpaceSSID = + CTX.getOrInsertSyncScopeID("agent-one-as"); + WorkgroupOneAddressSpaceSSID = + CTX.getOrInsertSyncScopeID("workgroup-one-as"); + WavefrontOneAddressSpaceSSID = + CTX.getOrInsertSyncScopeID("wavefront-one-as"); + SingleThreadOneAddressSpaceSSID = + CTX.getOrInsertSyncScopeID("singlethread-one-as"); } } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h index e6d4268a66e..2b0b8b42acf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h @@ -29,12 +29,22 @@ private: // All supported memory/synchronization scopes can be found here: // http://llvm.org/docs/AMDGPUUsage.html#memory-scopes - /// Agent synchronization scope ID. + /// Agent synchronization scope ID (cross address space). SyncScope::ID AgentSSID; - /// Workgroup synchronization scope ID. + /// Workgroup synchronization scope ID (cross address space). SyncScope::ID WorkgroupSSID; - /// Wavefront synchronization scope ID. + /// Wavefront synchronization scope ID (cross address space). SyncScope::ID WavefrontSSID; + /// System synchronization scope ID (single address space). + SyncScope::ID SystemOneAddressSpaceSSID; + /// Agent synchronization scope ID (single address space). + SyncScope::ID AgentOneAddressSpaceSSID; + /// Workgroup synchronization scope ID (single address space). + SyncScope::ID WorkgroupOneAddressSpaceSSID; + /// Wavefront synchronization scope ID (single address space). + SyncScope::ID WavefrontOneAddressSpaceSSID; + /// Single thread synchronization scope ID (single address space). + SyncScope::ID SingleThreadOneAddressSpaceSSID; /// In AMDGPU target synchronization scopes are inclusive, meaning a /// larger synchronization scope is inclusive of a smaller synchronization @@ -43,35 +53,70 @@ private: /// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not /// supported by the AMDGPU target. Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const { - if (SSID == SyncScope::SingleThread) + if (SSID == SyncScope::SingleThread || + SSID == getSingleThreadOneAddressSpaceSSID()) return 0; - else if (SSID == getWavefrontSSID()) + else if (SSID == getWavefrontSSID() || + SSID == getWavefrontOneAddressSpaceSSID()) return 1; - else if (SSID == getWorkgroupSSID()) + else if (SSID == getWorkgroupSSID() || + SSID == getWorkgroupOneAddressSpaceSSID()) return 2; - else if (SSID == getAgentSSID()) + else if (SSID == getAgentSSID() || + SSID == getAgentOneAddressSpaceSSID()) return 3; - else if (SSID == SyncScope::System) + else if (SSID == SyncScope::System || + SSID == getSystemOneAddressSpaceSSID()) return 4; return None; } + /// \returns True if \p SSID is restricted to single address space, false + /// otherwise + bool isOneAddressSpace(SyncScope::ID SSID) const { + return SSID == getSingleThreadOneAddressSpaceSSID() || + SSID == getWavefrontOneAddressSpaceSSID() || + SSID == getWorkgroupOneAddressSpaceSSID() || + SSID == getAgentOneAddressSpaceSSID() || + SSID == getSystemOneAddressSpaceSSID(); + } + public: AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI); - /// \returns Agent synchronization scope ID. + /// \returns Agent synchronization scope ID (cross address space). SyncScope::ID getAgentSSID() const { return AgentSSID; } - /// \returns Workgroup synchronization scope ID. + /// \returns Workgroup synchronization scope ID (cross address space). SyncScope::ID getWorkgroupSSID() const { return WorkgroupSSID; } - /// \returns Wavefront synchronization scope ID. + /// \returns Wavefront synchronization scope ID (cross address space). SyncScope::ID getWavefrontSSID() const { return WavefrontSSID; } + /// \returns System synchronization scope ID (single address space). + SyncScope::ID getSystemOneAddressSpaceSSID() const { + return SystemOneAddressSpaceSSID; + } + /// \returns Agent synchronization scope ID (single address space). + SyncScope::ID getAgentOneAddressSpaceSSID() const { + return AgentOneAddressSpaceSSID; + } + /// \returns Workgroup synchronization scope ID (single address space). + SyncScope::ID getWorkgroupOneAddressSpaceSSID() const { + return WorkgroupOneAddressSpaceSSID; + } + /// \returns Wavefront synchronization scope ID (single address space). + SyncScope::ID getWavefrontOneAddressSpaceSSID() const { + return WavefrontOneAddressSpaceSSID; + } + /// \returns Single thread synchronization scope ID (single address space). + SyncScope::ID getSingleThreadOneAddressSpaceSSID() const { + return SingleThreadOneAddressSpaceSSID; + } /// In AMDGPU target synchronization scopes are inclusive, meaning a /// larger synchronization scope is inclusive of a smaller synchronization @@ -87,7 +132,11 @@ public: if (!AIO || !BIO) return None; - return AIO.getValue() > BIO.getValue(); + bool IsAOneAddressSpace = isOneAddressSpace(A); + bool IsBOneAddressSpace = isOneAddressSpace(B); + + return AIO.getValue() >= BIO.getValue() && + (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace); } }; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 1080332d6e4..ba7ca691f2a 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI, Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>> SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrScope) const { - /// TODO: For now assume OpenCL memory model which treats each - /// address space as having a separate happens-before relation, and - /// so an instruction only has ordering with respect to the address - /// space it accesses, and if it accesses multiple address spaces it - /// does not require ordering of operations in different address - /// spaces. - if (SSID == SyncScope::System) + if (SSID == SyncScope::System) + return std::make_tuple(SIAtomicScope::SYSTEM, + SIAtomicAddrSpace::ATOMIC, + true); + if (SSID == MMI->getAgentSSID()) + return std::make_tuple(SIAtomicScope::AGENT, + SIAtomicAddrSpace::ATOMIC, + true); + if (SSID == MMI->getWorkgroupSSID()) + return std::make_tuple(SIAtomicScope::WORKGROUP, + SIAtomicAddrSpace::ATOMIC, + true); + if (SSID == MMI->getWavefrontSSID()) + return std::make_tuple(SIAtomicScope::WAVEFRONT, + SIAtomicAddrSpace::ATOMIC, + true); + if (SSID == SyncScope::SingleThread) + return std::make_tuple(SIAtomicScope::SINGLETHREAD, + SIAtomicAddrSpace::ATOMIC, + true); + if (SSID == MMI->getSystemOneAddressSpaceSSID()) return std::make_tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC & InstrScope, false); - if (SSID == MMI->getAgentSSID()) + if (SSID == MMI->getAgentOneAddressSpaceSSID()) return std::make_tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC & InstrScope, false); - if (SSID == MMI->getWorkgroupSSID()) + if (SSID == MMI->getWorkgroupOneAddressSpaceSSID()) return std::make_tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC & InstrScope, false); - if (SSID == MMI->getWavefrontSSID()) + if (SSID == MMI->getWavefrontOneAddressSpaceSSID()) return std::make_tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC & InstrScope, false); - if (SSID == SyncScope::SingleThread) + if (SSID == MMI->getSingleThreadOneAddressSpaceSSID()) return std::make_tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC & InstrScope, false); - /// TODO: To support HSA Memory Model need to add additional memory - /// scopes that specify that do require cross address space - /// ordering. return None; } @@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, bool VMCnt = false; bool LGKMCnt = false; - bool EXPCnt = false; if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: - VMCnt = true; + VMCnt |= true; break; case SIAtomicScope::WORKGROUP: case SIAtomicScope::WAVEFRONT: @@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, // also synchronizing with global/GDS memory as LDS operations // could be reordered with respect to later global/GDS memory // operations of the same wave. - LGKMCnt = IsCrossAddrSpaceOrdering; + LGKMCnt |= IsCrossAddrSpaceOrdering; break; case SIAtomicScope::WAVEFRONT: case SIAtomicScope::SINGLETHREAD: @@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, // also synchronizing with global/LDS memory as GDS operations // could be reordered with respect to later global/LDS memory // operations of the same wave. - EXPCnt = IsCrossAddrSpaceOrdering; + LGKMCnt |= IsCrossAddrSpaceOrdering; break; case SIAtomicScope::WORKGROUP: case SIAtomicScope::WAVEFRONT: @@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, } } - if (VMCnt || LGKMCnt || EXPCnt) { + if (VMCnt || LGKMCnt) { unsigned WaitCntImmediate = AMDGPU::encodeWaitcnt(IV, VMCnt ? 0 : getVmcntBitMask(IV), - EXPCnt ? 0 : getExpcntBitMask(IV), + getExpcntBitMask(IV), LGKMCnt ? 0 : getLgkmcntBitMask(IV)); BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate); Changed = true; |