AMDGPU: Add support for cross address space synchronization scopes

Differential Revision: https://reviews.llvm.org/D59517 llvm-svn: 356946
author: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> 2019-03-25 20:50:21 +0000
committer: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> 2019-03-25 20:50:21 +0000
commit: 51809cbc98ce187608bd5f2c46d18c5ffb7b6031 (patch)
tree: 4e164de16c93595c2d56d94635b14621ab92ec4d /llvm/lib/Target/AMDGPU
parent: 65bd5d8aa3b72f9db6e6c0c8f7a66b8bd9e61bb8 (diff)
download: bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.tar.gz
bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.zip
3 files changed, 101 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
index 1870c9ca5fa..4d9f08b3af0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.cpp
@@ -23,6 +23,16 @@ AMDGPUMachineModuleInfo::AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI)
   AgentSSID = CTX.getOrInsertSyncScopeID("agent");
   WorkgroupSSID = CTX.getOrInsertSyncScopeID("workgroup");
   WavefrontSSID = CTX.getOrInsertSyncScopeID("wavefront");
+  SystemOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("one-as");
+  AgentOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("agent-one-as");
+  WorkgroupOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("workgroup-one-as");
+  WavefrontOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("wavefront-one-as");
+  SingleThreadOneAddressSpaceSSID =
+      CTX.getOrInsertSyncScopeID("singlethread-one-as");
 }
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
index e6d4268a66e..2b0b8b42acf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineModuleInfo.h
@@ -29,12 +29,22 @@ private:
   // All supported memory/synchronization scopes can be found here:
   //   http://llvm.org/docs/AMDGPUUsage.html#memory-scopes
 
-  /// Agent synchronization scope ID.
+  /// Agent synchronization scope ID (cross address space).
   SyncScope::ID AgentSSID;
-  /// Workgroup synchronization scope ID.
+  /// Workgroup synchronization scope ID (cross address space).
   SyncScope::ID WorkgroupSSID;
-  /// Wavefront synchronization scope ID.
+  /// Wavefront synchronization scope ID (cross address space).
   SyncScope::ID WavefrontSSID;
+  /// System synchronization scope ID (single address space).
+  SyncScope::ID SystemOneAddressSpaceSSID;
+  /// Agent synchronization scope ID (single address space).
+  SyncScope::ID AgentOneAddressSpaceSSID;
+  /// Workgroup synchronization scope ID (single address space).
+  SyncScope::ID WorkgroupOneAddressSpaceSSID;
+  /// Wavefront synchronization scope ID (single address space).
+  SyncScope::ID WavefrontOneAddressSpaceSSID;
+  /// Single thread synchronization scope ID (single address space).
+  SyncScope::ID SingleThreadOneAddressSpaceSSID;
 
   /// In AMDGPU target synchronization scopes are inclusive, meaning a
   /// larger synchronization scope is inclusive of a smaller synchronization
@@ -43,35 +53,70 @@ private:
   /// \returns \p SSID's inclusion ordering, or "None" if \p SSID is not
   /// supported by the AMDGPU target.
   Optional<uint8_t> getSyncScopeInclusionOrdering(SyncScope::ID SSID) const {
-    if (SSID == SyncScope::SingleThread)
+    if (SSID == SyncScope::SingleThread ||
+        SSID == getSingleThreadOneAddressSpaceSSID())
       return 0;
-    else if (SSID == getWavefrontSSID())
+    else if (SSID == getWavefrontSSID() ||
+             SSID == getWavefrontOneAddressSpaceSSID())
       return 1;
-    else if (SSID == getWorkgroupSSID())
+    else if (SSID == getWorkgroupSSID() ||
+             SSID == getWorkgroupOneAddressSpaceSSID())
       return 2;
-    else if (SSID == getAgentSSID())
+    else if (SSID == getAgentSSID() ||
+             SSID == getAgentOneAddressSpaceSSID())
       return 3;
-    else if (SSID == SyncScope::System)
+    else if (SSID == SyncScope::System ||
+             SSID == getSystemOneAddressSpaceSSID())
       return 4;
 
     return None;
   }
 
+  /// \returns True if \p SSID is restricted to single address space, false
+  /// otherwise
+  bool isOneAddressSpace(SyncScope::ID SSID) const {
+    return SSID == getSingleThreadOneAddressSpaceSSID() ||
+        SSID == getWavefrontOneAddressSpaceSSID() ||
+        SSID == getWorkgroupOneAddressSpaceSSID() ||
+        SSID == getAgentOneAddressSpaceSSID() ||
+        SSID == getSystemOneAddressSpaceSSID();
+  }
+
 public:
   AMDGPUMachineModuleInfo(const MachineModuleInfo &MMI);
 
-  /// \returns Agent synchronization scope ID.
+  /// \returns Agent synchronization scope ID (cross address space).
   SyncScope::ID getAgentSSID() const {
     return AgentSSID;
   }
-  /// \returns Workgroup synchronization scope ID.
+  /// \returns Workgroup synchronization scope ID (cross address space).
   SyncScope::ID getWorkgroupSSID() const {
     return WorkgroupSSID;
   }
-  /// \returns Wavefront synchronization scope ID.
+  /// \returns Wavefront synchronization scope ID (cross address space).
   SyncScope::ID getWavefrontSSID() const {
     return WavefrontSSID;
   }
+  /// \returns System synchronization scope ID (single address space).
+  SyncScope::ID getSystemOneAddressSpaceSSID() const {
+    return SystemOneAddressSpaceSSID;
+  }
+  /// \returns Agent synchronization scope ID (single address space).
+  SyncScope::ID getAgentOneAddressSpaceSSID() const {
+    return AgentOneAddressSpaceSSID;
+  }
+  /// \returns Workgroup synchronization scope ID (single address space).
+  SyncScope::ID getWorkgroupOneAddressSpaceSSID() const {
+    return WorkgroupOneAddressSpaceSSID;
+  }
+  /// \returns Wavefront synchronization scope ID (single address space).
+  SyncScope::ID getWavefrontOneAddressSpaceSSID() const {
+    return WavefrontOneAddressSpaceSSID;
+  }
+  /// \returns Single thread synchronization scope ID (single address space).
+  SyncScope::ID getSingleThreadOneAddressSpaceSSID() const {
+    return SingleThreadOneAddressSpaceSSID;
+  }
 
   /// In AMDGPU target synchronization scopes are inclusive, meaning a
   /// larger synchronization scope is inclusive of a smaller synchronization
@@ -87,7 +132,11 @@ public:
     if (!AIO || !BIO)
       return None;
 
-    return AIO.getValue() > BIO.getValue();
+    bool IsAOneAddressSpace = isOneAddressSpace(A);
+    bool IsBOneAddressSpace = isOneAddressSpace(B);
+
+    return AIO.getValue() >= BIO.getValue() &&
+        (IsAOneAddressSpace == IsBOneAddressSpace || !IsAOneAddressSpace);
   }
 };
 
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 1080332d6e4..ba7ca691f2a 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -417,35 +417,46 @@ void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
 Optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
 SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
                                SIAtomicAddrSpace InstrScope) const {
-  /// TODO: For now assume OpenCL memory model which treats each
-  /// address space as having a separate happens-before relation, and
-  /// so an instruction only has ordering with respect to the address
-  /// space it accesses, and if it accesses multiple address spaces it
-  /// does not require ordering of operations in different address
-  /// spaces.
- if (SSID == SyncScope::System)
+  if (SSID == SyncScope::System)
+    return std::make_tuple(SIAtomicScope::SYSTEM,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getAgentSSID())
+    return std::make_tuple(SIAtomicScope::AGENT,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getWorkgroupSSID())
+    return std::make_tuple(SIAtomicScope::WORKGROUP,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getWavefrontSSID())
+    return std::make_tuple(SIAtomicScope::WAVEFRONT,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == SyncScope::SingleThread)
+    return std::make_tuple(SIAtomicScope::SINGLETHREAD,
+                           SIAtomicAddrSpace::ATOMIC,
+                           true);
+  if (SSID == MMI->getSystemOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::SYSTEM,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == MMI->getAgentSSID())
+  if (SSID == MMI->getAgentOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::AGENT,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == MMI->getWorkgroupSSID())
+  if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::WORKGROUP,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == MMI->getWavefrontSSID())
+  if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::WAVEFRONT,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  if (SSID == SyncScope::SingleThread)
+  if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
     return std::make_tuple(SIAtomicScope::SINGLETHREAD,
                            SIAtomicAddrSpace::ATOMIC & InstrScope,
                            false);
-  /// TODO: To support HSA Memory Model need to add additional memory
-  /// scopes that specify that do require cross address space
-  /// ordering.
   return None;
 }
 
@@ -721,13 +732,12 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
 
   bool VMCnt = false;
   bool LGKMCnt = false;
-  bool EXPCnt = false;
 
   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
     switch (Scope) {
     case SIAtomicScope::SYSTEM:
     case SIAtomicScope::AGENT:
-      VMCnt = true;
+      VMCnt |= true;
       break;
     case SIAtomicScope::WORKGROUP:
     case SIAtomicScope::WAVEFRONT:
@@ -751,7 +761,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
       // also synchronizing with global/GDS memory as LDS operations
       // could be reordered with respect to later global/GDS memory
       // operations of the same wave.
-      LGKMCnt = IsCrossAddrSpaceOrdering;
+      LGKMCnt |= IsCrossAddrSpaceOrdering;
       break;
     case SIAtomicScope::WAVEFRONT:
     case SIAtomicScope::SINGLETHREAD:
@@ -773,7 +783,7 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
       // also synchronizing with global/LDS memory as GDS operations
       // could be reordered with respect to later global/LDS memory
       // operations of the same wave.
-      EXPCnt = IsCrossAddrSpaceOrdering;
+      LGKMCnt |= IsCrossAddrSpaceOrdering;
       break;
     case SIAtomicScope::WORKGROUP:
     case SIAtomicScope::WAVEFRONT:
@@ -786,11 +796,11 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
     }
   }
 
-  if (VMCnt || LGKMCnt || EXPCnt) {
+  if (VMCnt || LGKMCnt) {
     unsigned WaitCntImmediate =
       AMDGPU::encodeWaitcnt(IV,
                             VMCnt ? 0 : getVmcntBitMask(IV),
-                            EXPCnt ? 0 : getExpcntBitMask(IV),
+                            getExpcntBitMask(IV),
                             LGKMCnt ? 0 : getLgkmcntBitMask(IV));
     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(WaitCntImmediate);
     Changed = true;
author	Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>	2019-03-25 20:50:21 +0000
committer	Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>	2019-03-25 20:50:21 +0000
commit	51809cbc98ce187608bd5f2c46d18c5ffb7b6031 (patch)
tree	4e164de16c93595c2d56d94635b14621ab92ec4d /llvm/lib/Target/AMDGPU
parent	65bd5d8aa3b72f9db6e6c0c8f7a66b8bd9e61bb8 (diff)
download	bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.tar.gz bcm5719-llvm-51809cbc98ce187608bd5f2c46d18c5ffb7b6031.zip