summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp8
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll222
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll222
3 files changed, 451 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 6fa52f57601..b4a4e9e3313 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -812,6 +812,12 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
+ const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
+
+ const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS()
+ ? AMDGPU::BUFFER_WBINVL1
+ : AMDGPU::BUFFER_WBINVL1_VOL;
+
if (Pos == Position::AFTER)
++MI;
@@ -819,7 +825,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL));
+ BuildMI(MBB, MI, DL, TII->get(Flush));
Changed = true;
break;
case SIAtomicScope::WORKGROUP:
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
new file mode 100644
index 00000000000..f692c763c0b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll
@@ -0,0 +1,222 @@
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+
+; FUNC-LABEL: {{^}}system_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_acquire() {
+entry:
+ fence acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_release() {
+entry:
+ fence release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_acq_rel() {
+entry:
+ fence acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_seq_cst() {
+entry:
+ fence seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_acquire() {
+entry:
+ fence syncscope("singlethread") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_release() {
+entry:
+ fence syncscope("singlethread") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_acq_rel() {
+entry:
+ fence syncscope("singlethread") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_seq_cst() {
+entry:
+ fence syncscope("singlethread") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_acquire() {
+entry:
+ fence syncscope("agent") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_release() {
+entry:
+ fence syncscope("agent") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_acq_rel() {
+entry:
+ fence syncscope("agent") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_seq_cst() {
+entry:
+ fence syncscope("agent") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_acquire:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_acquire() {
+entry:
+ fence syncscope("workgroup") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_release:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_release() {
+entry:
+ fence syncscope("workgroup") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_acq_rel() {
+entry:
+ fence syncscope("workgroup") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_seq_cst() {
+entry:
+ fence syncscope("workgroup") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_acquire() {
+entry:
+ fence syncscope("wavefront") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_release() {
+entry:
+ fence syncscope("wavefront") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_acq_rel() {
+entry:
+ fence syncscope("wavefront") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_seq_cst() {
+entry:
+ fence syncscope("wavefront") seq_cst
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll
new file mode 100644
index 00000000000..609dc0400c4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll
@@ -0,0 +1,222 @@
+; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s
+
+; FUNC-LABEL: {{^}}system_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_acquire() {
+entry:
+ fence acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_release() {
+entry:
+ fence release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_acq_rel() {
+entry:
+ fence acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}system_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @system_seq_cst() {
+entry:
+ fence seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_acquire() {
+entry:
+ fence syncscope("singlethread") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_release() {
+entry:
+ fence syncscope("singlethread") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_acq_rel() {
+entry:
+ fence syncscope("singlethread") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}singlethread_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @singlethread_seq_cst() {
+entry:
+ fence syncscope("singlethread") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN-NEXT: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_acquire() {
+entry:
+ fence syncscope("agent") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_release() {
+entry:
+ fence syncscope("agent") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_acq_rel() {
+entry:
+ fence syncscope("agent") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}agent_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_waitcnt vmcnt(0){{$}}
+; GCN: buffer_wbinvl1{{$}}
+; GCN: s_endpgm
+define amdgpu_kernel void @agent_seq_cst() {
+entry:
+ fence syncscope("agent") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_acquire:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_acquire() {
+entry:
+ fence syncscope("workgroup") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_release:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_release() {
+entry:
+ fence syncscope("workgroup") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_acq_rel() {
+entry:
+ fence syncscope("workgroup") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}workgroup_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: s_waitcnt vmcnt(0){{$}}
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @workgroup_seq_cst() {
+entry:
+ fence syncscope("workgroup") seq_cst
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_acquire:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_acquire() {
+entry:
+ fence syncscope("wavefront") acquire
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_release:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_release() {
+entry:
+ fence syncscope("wavefront") release
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_acq_rel:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_acq_rel() {
+entry:
+ fence syncscope("wavefront") acq_rel
+ ret void
+}
+
+; FUNC-LABEL: {{^}}wavefront_seq_cst:
+; GCN: %bb.0
+; GCN-NOT: ATOMIC_FENCE
+; GCN: s_endpgm
+define amdgpu_kernel void @wavefront_seq_cst() {
+entry:
+ fence syncscope("wavefront") seq_cst
+ ret void
+}
OpenPOWER on IntegriCloud