summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2019-03-25 20:50:21 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2019-03-25 20:50:21 +0000
commit51809cbc98ce187608bd5f2c46d18c5ffb7b6031 (patch)
tree4e164de16c93595c2d56d94635b14621ab92ec4d /llvm/lib/Target/AMDGPU
parent65bd5d8aa3b72f9db6e6c0c8f7a66b8bd9e61bb8 (diff)
downloadbcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.tar.gz
bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.zip
AMDGPU: Add support for cross address space synchronization scopes
Differential Revision: https://reviews.llvm.org/D59517 llvm-svn: 356946
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h73
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp50
3 files changed, 101 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
index 1870c9ca5fa..4d9f08b3af0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
@@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
AgentSSID = CTX.getOrInsertSyncScopeID("agent");
WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
+ SystemOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("one-as");
+ AgentOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("agent-one-as");
+ WorkgroupOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("workgroup-one-as");
+ WavefrontOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("wavefront-one-as");
+ SingleThreadOneAddressSpaceSSID =
+ CTX.getOrInsertSyncScopeID("singlethread-one-as");
}
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
index e6d4268a66e..2b0b8b42acf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
@@ -29,12 +29,22 @@ private:
// All supported memory/synchronization scopes can be found here:
// http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
- /// Agent synchronization scope ID.
+ /// Agent synchronization scope ID (cross address space).
SyncScope::ID AgentSSID;
- /// Workgroup synchronization scope ID.
+ /// Workgroup synchronization scope ID (cross address space).
SyncScope::ID WorkgroupSSID;
- /// Wavefront synchronization scope ID.
+ /// Wavefront synchronization scope ID (cross address space).
SyncScope::ID WavefrontSSID;
+ /// System synchronization scope ID (single address space).
+ SyncScope::ID SystemOneAddressSpaceSSID;
+ /// Agent synchronization scope ID (single address space).
+ SyncScope::ID AgentOneAddressSpaceSSID;
+ /// Workgroup synchronization scope ID (single address space).
+ SyncScope::ID WorkgroupOneAddressSpaceSSID;
+ /// Wavefront synchronization scope ID (single address space).
+ SyncScope::ID WavefrontOneAddressSpaceSSID;
+ /// Single thread synchronization scope ID (single address space).
+ SyncScope::ID SingleThreadOneAddressSpaceSSID;
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@@ -43,35 +53,70 @@ private:
/// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
/// supported by the AMDGPU target.
Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
- if (SSID == SyncScope::SingleThread)
+ if (SSID == SyncScope::SingleThread ||
+ SSID == getSingleThreadOneAddressSpaceSSID())
return 0;
- else if (SSID == getWavefrontSSID())
+ else if (SSID == getWavefrontSSID() ||
+ SSID == getWavefrontOneAddressSpaceSSID())
return 1;
- else if (SSID == getWorkgroupSSID())
+ else if (SSID == getWorkgroupSSID() ||
+ SSID == getWorkgroupOneAddressSpaceSSID())
return 2;
- else if (SSID == getAgentSSID())
+ else if (SSID == getAgentSSID() ||
+ SSID == getAgentOneAddressSpaceSSID())
return 3;
- else if (SSID == SyncScope::System)
+ else if (SSID == SyncScope::System ||
+ SSID == getSystemOneAddressSpaceSSID())
return 4;
return None;
}
+ /// \returns True if \p SSID is restricted to single address space, false
+ /// otherwise
+ bool isOneAddressSpace(SyncScope::ID SSID) const {
+ return SSID == getSingleThreadOneAddressSpaceSSID() ||
+ SSID == getWavefrontOneAddressSpaceSSID() ||
+ SSID == getWorkgroupOneAddressSpaceSSID() ||
+ SSID == getAgentOneAddressSpaceSSID() ||
+ SSID == getSystemOneAddressSpaceSSID();
+ }
+
public:
AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
- /// \returns Agent synchronization scope ID.
+ /// \returns Agent synchronization scope ID (cross address space).
SyncScope::ID getAgentSSID() const {
return AgentSSID;
}
- /// \returns Workgroup synchronization scope ID.
+ /// \returns Workgroup synchronization scope ID (cross address space).
SyncScope::ID getWorkgroupSSID() const {
return WorkgroupSSID;
}
- /// \returns Wavefront synchronization scope ID.
+ /// \returns Wavefront synchronization scope ID (cross address space).
SyncScope::ID getWavefrontSSID() const {
return WavefrontSSID;
}
+ /// \returns System synchronization scope ID (single address space).
+ SyncScope::ID getSystemOneAddressSpaceSSID() const {
+ return SystemOneAddressSpaceSSID;
+ }
+ /// \returns Agent synchronization scope ID (single address space).
+ SyncScope::ID getAgentOneAddressSpaceSSID() const {
+ return AgentOneAddressSpaceSSID;
+ }
+ /// \returns Workgroup synchronization scope ID (single address space).
+ SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
+ return WorkgroupOneAddressSpaceSSID;
+ }
+ /// \returns Wavefront synchronization scope ID (single address space).
+ SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
+ return WavefrontOneAddressSpaceSSID;
+ }
+ /// \returns Single thread synchronization scope ID (single address space).
+ SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
+ return SingleThreadOneAddressSpaceSSID;
+ }
/// In AMDGPU target synchronization scopes are inclusive, meaning a
/// larger synchronization scope is inclusive of a smaller synchronization
@@ -87,7 +132,11 @@ public:
if (!AIO || !BIO)
return None;
- return AIO.getValue() > BIO.getValue();
+ bool IsAOneAddressSpace = isOneAddressSpace(A);
+ bool IsBOneAddressSpace = isOneAddressSpace(B);
+
+ return AIO.getValue() >= BIO.getValue() &&
+ (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
}
};
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 1080332d6e4..ba7ca691f2a 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
SIAtomicAddrSpace InstrScope) const {
- /// TODO: For now assume OpenCL memory model which treats each
- /// address space as having a separate happens-before relation, and
- /// so an instruction only has ordering with respect to the address
- /// space it accesses, and if it accesses multiple address spaces it
- /// does not require ordering of operations in different address
- /// spaces.
- if (SSID == SyncScope::System)
+ if (SSID == SyncScope::System)
+ return std::make_tuple(SIAtomicScope::SYSTEM,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getAgentSSID())
+ return std::make_tuple(SIAtomicScope::AGENT,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getWorkgroupSSID())
+ return std::make_tuple(SIAtomicScope::WORKGROUP,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getWavefrontSSID())
+ return std::make_tuple(SIAtomicScope::WAVEFRONT,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == SyncScope::SingleThread)
+ return std::make_tuple(SIAtomicScope::SINGLETHREAD,
+ SIAtomicAddrSpace::ATOMIC,
+ true);
+ if (SSID == MMI->getSystemOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SYSTEM,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getAgentSSID())
+ if (SSID == MMI->getAgentOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::AGENT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getWorkgroupSSID())
+ if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WORKGROUP,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == MMI->getWavefrontSSID())
+ if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::WAVEFRONT,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- if (SSID == SyncScope::SingleThread)
+ if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
return std::make_tuple(SIAtomicScope::SINGLETHREAD,
SIAtomicAddrSpace::ATOMIC & InstrScope,
false);
- /// TODO: To support HSA Memory Model need to add additional memory
- /// scopes that specify that do require cross address space
- /// ordering.
return None;
}
@@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
bool VMCnt = false;
bool LGKMCnt = false;
- bool EXPCnt = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- VMCnt = true;
+ VMCnt |= true;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/GDS memory as LDS operations
// could be reordered with respect to later global/GDS memory
// operations of the same wave.
- LGKMCnt = IsCrossAddrSpaceOrdering;
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// also synchronizing with global/LDS memory as GDS operations
// could be reordered with respect to later global/LDS memory
// operations of the same wave.
- EXPCnt = IsCrossAddrSpaceOrdering;
+ LGKMCnt |= IsCrossAddrSpaceOrdering;
break;
case SIAtomicScope::WORKGROUP:
case SIAtomicScope::WAVEFRONT:
@@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
}
}
- if (VMCnt || LGKMCnt || EXPCnt) {
+ if (VMCnt || LGKMCnt) {
unsigned WaitCntImmediate =
AMDGPU::encodeWaitcnt(IV,
VMCnt ? 0 : getVmcntBitMask(IV),
- EXPCnt ? 0 : getExpcntBitMask(IV),
+ getExpcntBitMask(IV),
LGKMCnt ? 0 : getLgkmcntBitMask(IV));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
Changed = true;
OpenPOWER on IntegriCloud