diff options
author | Neil Henning <neil.henning@amd.com> | 2018-12-10 16:35:53 +0000 |
---|---|---|
committer | Neil Henning <neil.henning@amd.com> | 2018-12-10 16:35:53 +0000 |
commit | e448351b77145a00372b675edba8f3022283e25e (patch) | |
tree | cc7d41d4703aebba1025a995017afe36c34d9300 | |
parent | 2b26a98a0d1c03e2196f37f0853141d2e4a5de6b (diff) | |
download | bcm5719-llvm-e448351b77145a00372b675edba8f3022283e25e.tar.gz bcm5719-llvm-e448351b77145a00372b675edba8f3022283e25e.zip |
[AMDGPU] Change the l1 flush instruction for AMDPAL/MESA3D.
This commit changes which l1 flush instruction is used for AMDPAL and
MESA3d workloads to flush the entire l1 cache instead of just the
volatile lines.
Differential Revision: https://reviews.llvm.org/D55367
llvm-svn: 348771
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll | 222 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll | 222 |
3 files changed, 451 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 6fa52f57601..b4a4e9e3313 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -812,6 +812,12 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI, MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); + const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>(); + + const unsigned Flush = STM.isAmdPalOS() || STM.isMesa3DOS() + ? AMDGPU::BUFFER_WBINVL1 + : AMDGPU::BUFFER_WBINVL1_VOL; + if (Pos == Position::AFTER) ++MI; @@ -819,7 +825,7 @@ bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI, switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: - BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL)); + BuildMI(MBB, MI, DL, TII->get(Flush)); Changed = true; break; case SIAtomicScope::WORKGROUP: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll new file mode 100644 index 00000000000..f692c763c0b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll @@ -0,0 +1,222 @@ +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s + +; FUNC-LABEL: {{^}}system_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_acquire() { +entry: + fence acquire + ret void +} + +; FUNC-LABEL: {{^}}system_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_release() { +entry: + fence release + ret void +} + +; FUNC-LABEL: {{^}}system_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_acq_rel() { +entry: + fence acq_rel + ret void +} + +; FUNC-LABEL: {{^}}system_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_seq_cst() { +entry: + fence seq_cst + ret void +} + +; FUNC-LABEL: {{^}}singlethread_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_acquire() { +entry: + fence syncscope("singlethread") acquire + ret void +} + +; FUNC-LABEL: {{^}}singlethread_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_release() { +entry: + fence syncscope("singlethread") release + ret void +} + +; FUNC-LABEL: {{^}}singlethread_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_acq_rel() { +entry: + fence syncscope("singlethread") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}singlethread_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_seq_cst() { +entry: + fence syncscope("singlethread") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}agent_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_acquire() { +entry: + fence syncscope("agent") acquire + ret void +} + +; FUNC-LABEL: {{^}}agent_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_release() { +entry: + fence syncscope("agent") release + ret void +} + +; FUNC-LABEL: {{^}}agent_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_acq_rel() { +entry: + fence syncscope("agent") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}agent_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_seq_cst() { +entry: + fence syncscope("agent") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}workgroup_acquire: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_acquire() { +entry: + fence syncscope("workgroup") acquire + ret void +} + +; FUNC-LABEL: {{^}}workgroup_release: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_release() { +entry: + fence syncscope("workgroup") release + ret void +} + +; FUNC-LABEL: {{^}}workgroup_acq_rel: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_acq_rel() { +entry: + fence syncscope("workgroup") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}workgroup_seq_cst: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_seq_cst() { +entry: + fence syncscope("workgroup") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}wavefront_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_acquire() { +entry: + fence syncscope("wavefront") acquire + ret void +} + +; FUNC-LABEL: {{^}}wavefront_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_release() { +entry: + fence syncscope("wavefront") release + ret void +} + +; FUNC-LABEL: {{^}}wavefront_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_acq_rel() { +entry: + fence syncscope("wavefront") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}wavefront_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_seq_cst() { +entry: + fence syncscope("wavefront") seq_cst + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll new file mode 100644 index 00000000000..609dc0400c4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-mesa3d.ll @@ -0,0 +1,222 @@ +; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s + +; FUNC-LABEL: {{^}}system_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_acquire() { +entry: + fence acquire + ret void +} + +; FUNC-LABEL: {{^}}system_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_release() { +entry: + fence release + ret void +} + +; FUNC-LABEL: {{^}}system_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_acq_rel() { +entry: + fence acq_rel + ret void +} + +; FUNC-LABEL: {{^}}system_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @system_seq_cst() { +entry: + fence seq_cst + ret void +} + +; FUNC-LABEL: {{^}}singlethread_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_acquire() { +entry: + fence syncscope("singlethread") acquire + ret void +} + +; FUNC-LABEL: {{^}}singlethread_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_release() { +entry: + fence syncscope("singlethread") release + ret void +} + +; FUNC-LABEL: {{^}}singlethread_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_acq_rel() { +entry: + fence syncscope("singlethread") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}singlethread_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @singlethread_seq_cst() { +entry: + fence syncscope("singlethread") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}agent_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_acquire() { +entry: + fence syncscope("agent") acquire + ret void +} + +; FUNC-LABEL: {{^}}agent_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_release() { +entry: + fence syncscope("agent") release + ret void +} + +; FUNC-LABEL: {{^}}agent_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_acq_rel() { +entry: + fence syncscope("agent") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}agent_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN: buffer_wbinvl1{{$}} +; GCN: s_endpgm +define amdgpu_kernel void @agent_seq_cst() { +entry: + fence syncscope("agent") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}workgroup_acquire: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_acquire() { +entry: + fence syncscope("workgroup") acquire + ret void +} + +; FUNC-LABEL: {{^}}workgroup_release: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_release() { +entry: + fence syncscope("workgroup") release + ret void +} + +; FUNC-LABEL: {{^}}workgroup_acq_rel: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_acq_rel() { +entry: + fence syncscope("workgroup") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}workgroup_seq_cst: +; GCN: %bb.0 +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @workgroup_seq_cst() { +entry: + fence syncscope("workgroup") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}wavefront_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_acquire() { +entry: + fence syncscope("wavefront") acquire + ret void +} + +; FUNC-LABEL: {{^}}wavefront_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_release() { +entry: + fence syncscope("wavefront") release + ret void +} + +; FUNC-LABEL: {{^}}wavefront_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_acq_rel() { +entry: + fence syncscope("wavefront") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}wavefront_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +define amdgpu_kernel void @wavefront_seq_cst() { +entry: + fence syncscope("wavefront") seq_cst + ret void +} |