summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/fix-wwm-liveness.mir185
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/wqm.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/wwm-reserved.ll188
9 files changed, 197 insertions, 206 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
index 9a22780b9c4..88824aa64ee 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
@@ -112,7 +112,7 @@ entry:
; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
; GFX7LESS-NOT: s_bcnt1_i32_b64
; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
-; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
+; GFX8MORE: v_mov_b32_dpp v[[wave_shr1:[0-9]+]], v{{[0-9]+}} wave_shr:1 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:1 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:2 row_mask:0xf bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[wave_shr1]] row_shr:3 row_mask:0xf bank_mask:0xf
@@ -120,8 +120,7 @@ entry:
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_shr:8 row_mask:0xf bank_mask:0xc
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:15 row_mask:0xa bank_mask:0xf
; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
-; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
-; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GFX8MORE: buffer_atomic_sub v[[value]]
define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 201eac172c2..969eb3cfa7a 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7LESS %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8MORE %s
declare i32 @llvm.amdgcn.workitem.id.x()
@@ -133,9 +133,7 @@ entry:
; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
; GFX7LESS-NOT: s_bcnt1_i32_b64
; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
-; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
-; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
-; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GFX8MORE: buffer_atomic_sub v[[value]]
define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out, i32 addrspace(1)* %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 94c6ef85436..3ce91e83cf3 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -136,9 +136,7 @@ entry:
; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
; GFX7LESS-NOT: s_bcnt1_i32_b64
; GFX7LESS: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
-; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
-; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GFX8MORE: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
index f7980cc8691..c2db5547201 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
@@ -104,9 +104,7 @@ entry:
; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
; GFX7LESS-NOT: s_bcnt1_i32_b64
; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
-; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
-; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
-; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GFX8MORE: buffer_atomic_sub v[[value]]
define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
index 47fed39cdec..eb3f0ab17ac 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
@@ -117,9 +117,7 @@ entry:
; GFX7LESS-NOT: v_mbcnt_hi_u32_b32
; GFX7LESS-NOT: s_bcnt1_i32_b64
; GFX7LESS: buffer_atomic_sub v{{[0-9]+}}
-; GFX8MORE: v_mov_b32_dpp v{{[0-9]+}}, v[[sub_value:[0-9]+]] wave_shr:1 row_mask:0xf bank_mask:0xf
-; GFX8MORE: v_sub_u32_e32 v[[sub_value]],{{( vcc,)?}} v[[sub_value]], v{{[0-9]+}}
-; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v[[sub_value]], 63
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
; GFX8MORE: buffer_atomic_sub v[[value]]
define amdgpu_kernel void @sub_i32_varying_vdata(i32 addrspace(1)* %out, <4 x i32> %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/fix-wwm-liveness.mir b/llvm/test/CodeGen/AMDGPU/fix-wwm-liveness.mir
deleted file mode 100644
index dcddeb71b57..00000000000
--- a/llvm/test/CodeGen/AMDGPU/fix-wwm-liveness.mir
+++ /dev/null
@@ -1,185 +0,0 @@
-# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fix-wwm-liveness -o - %s | FileCheck %s
-
-# Test a then phi value.
-#CHECK: test_wwm_liveness_then_phi
-#CHECK: %21:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit %21
-
----
-name: test_wwm_liveness_then_phi
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-tracksRegLiveness: true
-registers:
- - { id: 0, class: sreg_64, preferred-register: '' }
- - { id: 1, class: sgpr_32, preferred-register: '' }
- - { id: 2, class: sgpr_32, preferred-register: '' }
- - { id: 3, class: vgpr_32, preferred-register: '' }
- - { id: 4, class: vgpr_32, preferred-register: '' }
- - { id: 5, class: vgpr_32, preferred-register: '' }
- - { id: 6, class: vgpr_32, preferred-register: '' }
- - { id: 7, class: vgpr_32, preferred-register: '' }
- - { id: 8, class: sreg_64, preferred-register: '$vcc' }
- - { id: 9, class: sreg_64, preferred-register: '' }
- - { id: 10, class: sreg_32_xm0, preferred-register: '' }
- - { id: 11, class: sreg_64, preferred-register: '' }
- - { id: 12, class: sreg_32_xm0, preferred-register: '' }
- - { id: 13, class: sreg_32_xm0, preferred-register: '' }
- - { id: 14, class: sreg_32_xm0, preferred-register: '' }
- - { id: 15, class: sreg_128, preferred-register: '' }
- - { id: 16, class: vgpr_32, preferred-register: '' }
- - { id: 17, class: vgpr_32, preferred-register: '' }
- - { id: 18, class: vgpr_32, preferred-register: '' }
- - { id: 19, class: sreg_64, preferred-register: '' }
- - { id: 20, class: sreg_64, preferred-register: '' }
- - { id: 21, class: vgpr_32, preferred-register: '' }
- - { id: 22, class: sreg_64, preferred-register: '' }
- - { id: 23, class: sreg_64, preferred-register: '' }
-liveins:
-body: |
- bb.0:
- successors: %bb.1(0x40000000), %bb.2(0x40000000)
-
- %21 = V_MOV_B32_e32 0, implicit $exec
- %5 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
- %6 = V_MBCNT_HI_U32_B32_e32 -1, killed %5, implicit $exec
- %8 = V_CMP_GT_U32_e64 32, killed %6, implicit $exec
- %22 = COPY $exec, implicit-def $exec
- %23 = S_AND_B64 %22, %8, implicit-def dead $scc
- %0 = S_XOR_B64 %23, %22, implicit-def dead $scc
- $exec = S_MOV_B64_term killed %23
- SI_MASK_BRANCH %bb.2, implicit $exec
- S_BRANCH %bb.1
-
- bb.1:
- successors: %bb.2(0x80000000)
-
- %13 = S_MOV_B32 61440
- %14 = S_MOV_B32 -1
- %15 = REG_SEQUENCE undef %12, 1, undef %10, 2, killed %14, 3, killed %13, 4
- %19 = COPY $exec
- $exec = S_MOV_B64 -1
- %16 = BUFFER_LOAD_DWORD_OFFSET %15, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4)
- %17 = V_ADD_F32_e32 1065353216, killed %16, implicit $exec
- $exec = EXIT_WWM killed %19
- %21 = V_MOV_B32_e32 1, implicit $exec
- early-clobber %18 = WWM killed %17, implicit $exec
- BUFFER_STORE_DWORD_OFFSET killed %18, killed %15, 0, 0, 0, 0, 0, implicit $exec :: (store 4)
-
- bb.2:
- $exec = S_OR_B64 $exec, killed %0, implicit-def $scc
- $vgpr0 = COPY killed %21
- SI_RETURN_TO_EPILOG killed $vgpr0
-
-...
-
-# Test a loop with a loop exit value and a loop phi.
-#CHECK: test_wwm_liveness_loop
-#CHECK: %4:vgpr_32 = IMPLICIT_DEF
-#CHECK: bb.1:
-#CHECK: %4:vgpr_32 = FLAT_LOAD_DWORD{{.*}}, implicit %4
-#CHECK: %27:vgpr_32 = COPY killed %21, implicit %27
-
----
-name: test_wwm_liveness_loop
-alignment: 0
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-registers:
- - { id: 0, class: vgpr_32, preferred-register: '' }
- - { id: 1, class: sreg_32_xm0, preferred-register: '' }
- - { id: 2, class: sreg_64, preferred-register: '' }
- - { id: 3, class: sreg_32_xm0, preferred-register: '' }
- - { id: 4, class: vgpr_32, preferred-register: '' }
- - { id: 5, class: sreg_32_xm0, preferred-register: '' }
- - { id: 6, class: sreg_64, preferred-register: '' }
- - { id: 7, class: sreg_64, preferred-register: '' }
- - { id: 8, class: sreg_64, preferred-register: '' }
- - { id: 9, class: vreg_64, preferred-register: '' }
- - { id: 10, class: vgpr_32, preferred-register: '' }
- - { id: 11, class: vgpr_32, preferred-register: '' }
- - { id: 12, class: vgpr_32, preferred-register: '' }
- - { id: 13, class: sreg_64, preferred-register: '' }
- - { id: 14, class: vreg_64, preferred-register: '' }
- - { id: 15, class: sreg_32_xm0, preferred-register: '' }
- - { id: 16, class: vgpr_32, preferred-register: '' }
- - { id: 17, class: sreg_64, preferred-register: '$vcc' }
- - { id: 18, class: vgpr_32, preferred-register: '' }
- - { id: 19, class: vgpr_32, preferred-register: '' }
- - { id: 20, class: vgpr_32, preferred-register: '' }
- - { id: 21, class: vgpr_32, preferred-register: '' }
- - { id: 22, class: vgpr_32, preferred-register: '' }
- - { id: 23, class: sreg_64, preferred-register: '' }
- - { id: 24, class: sreg_64, preferred-register: '' }
- - { id: 25, class: sreg_64, preferred-register: '' }
- - { id: 26, class: sreg_64, preferred-register: '' }
- - { id: 27, class: vgpr_32, preferred-register: '' }
-liveins:
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 0
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- maxCallFrameSize: 4294967295
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack:
-stack:
-constants:
-body: |
- bb.0:
- successors: %bb.1(0x80000000)
-
- %25:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- %0:vgpr_32 = FLAT_LOAD_DWORD undef %9:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
- $exec = EXIT_WWM killed %25
- %12:vgpr_32 = V_MBCNT_LO_U32_B32_e64 -1, 0, implicit $exec
- %7:sreg_64 = S_MOV_B64 0
- %26:sreg_64 = COPY killed %7
- %27:vgpr_32 = COPY killed %12
-
- bb.1:
- successors: %bb.2(0x04000000), %bb.1(0x7c000000)
-
- %24:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- %20:vgpr_32 = COPY killed %27
- %2:sreg_64 = COPY killed %26
- %4:vgpr_32 = FLAT_LOAD_DWORD undef %14:vreg_64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
- $exec = EXIT_WWM killed %24
- %22:vgpr_32 = V_ADD_I32_e32 -1, killed %20, implicit-def dead $vcc, implicit $exec
- %17:sreg_64 = V_CMP_EQ_U32_e64 0, %22, implicit $exec
- %6:sreg_64 = S_OR_B64 killed %17, killed %2, implicit-def $scc
- %21:vgpr_32 = COPY killed %22
- %26:sreg_64 = COPY %6
- %27:vgpr_32 = COPY killed %21
- $exec = S_ANDN2_B64_term $exec, %6, implicit-def $scc
- S_CBRANCH_EXECNZ %bb.1, implicit $exec
- S_BRANCH %bb.2
-
- bb.2:
- $exec = S_OR_B64 $exec, killed %6, implicit-def $scc
- %23:sreg_64 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- %18:vgpr_32 = V_ADD_F32_e32 killed %0, killed %4, implicit $exec
- $exec = EXIT_WWM killed %23
- early-clobber %19:vgpr_32 = COPY killed %18, implicit $exec
- $vgpr0 = COPY killed %19
- SI_RETURN_TO_EPILOG killed $vgpr0
-
-...
-
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index 358aa5f38ec..51047983104 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -81,7 +81,6 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.5, align 4, addrspace 5)
- ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
@@ -93,9 +92,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
; GCN: renamable $vgpr19 = COPY renamable $vgpr18
; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
- ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.6, align 4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
- ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 724c3a9fad6..6531d625866 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -3,7 +3,7 @@
---
# Check for awareness that s_or_saveexec_b64 clobbers SCC
#
-#CHECK: S_OR_SAVEEXEC_B64
+#CHECK: ENTER_WWM
#CHECK: S_CMP_LT_I32
#CHECK: S_CSELECT_B32
name: test_wwm_scc
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
new file mode 100644
index 00000000000..b6da84eb5be
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -0,0 +1,188 @@
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx900 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-O0 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-O3 %s
+
+define amdgpu_cs void @no_cfg(<4 x i32> inreg %tmp14) {
+ %tmp100 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %tmp14, i32 0, i32 0, i32 0)
+ %tmp101 = bitcast <2 x float> %tmp100 to <2 x i32>
+ %tmp102 = extractelement <2 x i32> %tmp101, i32 0
+ %tmp103 = extractelement <2 x i32> %tmp101, i32 1
+ %tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0)
+ %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0)
+
+; GFX9: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
+; GFX9: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
+; GFX9: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]]
+ %tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false)
+ %tmp121 = add i32 %tmp105, %tmp120
+ %tmp122 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp121)
+
+; GFX9: v_mov_b32_dpp v[[SECOND_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
+; GFX9: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]]
+; GFX9: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]]
+ %tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false)
+ %tmp136 = add i32 %tmp107, %tmp135
+ %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
+
+; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
+; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
+ %tmp138 = icmp eq i32 %tmp122, %tmp137
+ %tmp139 = sext i1 %tmp138 to i32
+ %tmp140 = shl nsw i32 %tmp139, 1
+ %tmp141 = and i32 %tmp140, 2
+ %tmp145 = bitcast i32 %tmp141 to float
+ call void @llvm.amdgcn.raw.buffer.store.f32(float %tmp145, <4 x i32> %tmp14, i32 4, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_cs void @cfg(<4 x i32> inreg %tmp14, i32 %arg) {
+entry:
+ %tmp100 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %tmp14, i32 0, i32 0, i32 0)
+ %tmp101 = bitcast <2 x float> %tmp100 to <2 x i32>
+ %tmp102 = extractelement <2 x i32> %tmp101, i32 0
+ %tmp105 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp102, i32 0)
+
+; GFX9: v_mov_b32_dpp v[[FIRST_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
+; GFX9: v_add_u32_e32 v[[FIRST_ADD:[0-9]+]], v{{[0-9]+}}, v[[FIRST_MOV]]
+; GFX9: v_mov_b32_e32 v[[FIRST:[0-9]+]], v[[FIRST_ADD]]
+; GFX9-O0: buffer_store_dword v[[FIRST]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, s[[FIRST_SGPR_OFFSET:[0-9]+]] offset:[[FIRST_IMM_OFFSET:[0-9]+]]
+ %tmp120 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp105, i32 323, i32 12, i32 15, i1 false)
+ %tmp121 = add i32 %tmp105, %tmp120
+ %tmp122 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp121)
+
+ %cond = icmp eq i32 %arg, 0
+ br i1 %cond, label %if, label %merge
+if:
+ %tmp103 = extractelement <2 x i32> %tmp101, i32 1
+ %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %tmp103, i32 0)
+
+; GFX9: v_mov_b32_dpp v[[SECOND_MOV:[0-9]+]], v{{[0-9]+}} row_bcast:31 row_mask:0xc bank_mask:0xf
+; GFX9: v_add_u32_e32 v[[SECOND_ADD:[0-9]+]], v{{[0-9]+}}, v[[SECOND_MOV]]
+; GFX9: v_mov_b32_e32 v[[SECOND:[0-9]+]], v[[SECOND_ADD]]
+; GFX9-O0: buffer_store_dword v[[SECOND]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, s[[SECOND_SGPR_OFFSET:[0-9]+]] offset:[[SECOND_IMM_OFFSET:[0-9]+]]
+ %tmp135 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp107, i32 323, i32 12, i32 15, i1 false)
+ %tmp136 = add i32 %tmp107, %tmp135
+ %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
+ br label %merge
+
+merge:
+ %merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ]
+; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]]
+; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, s[[SECOND_SGPR_OFFSET]] offset:[[SECOND_IMM_OFFSET]]
+; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, s[[FIRST_SGPR_OFFSET]] offset:[[FIRST_IMM_OFFSET]]
+; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]]
+ %tmp138 = icmp eq i32 %tmp122, %merge_value
+ %tmp139 = sext i1 %tmp138 to i32
+ %tmp140 = shl nsw i32 %tmp139, 1
+ %tmp141 = and i32 %tmp140, 2
+ %tmp145 = bitcast i32 %tmp141 to float
+ call void @llvm.amdgcn.raw.buffer.store.f32(float %tmp145, <4 x i32> %tmp14, i32 4, i32 0, i32 0)
+ ret void
+}
+
+define i32 @called(i32 %a) noinline {
+; GFX9: v_add_u32_e32 v1, v0, v0
+ %add = add i32 %a, %a
+; GFX9: v_mul_lo_i32 v0, v1, v0
+ %mul = mul i32 %add, %a
+; GFX9: v_sub_u32_e32 v0, v0, v1
+ %sub = sub i32 %mul, %add
+ ret i32 %sub
+}
+
+define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) {
+; GFX9-O0: v_mov_b32_e32 v0, s2
+; GFX9-O3: v_mov_b32_e32 v2, s0
+; GFX9-NEXT: s_not_b64 exec, exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3
+; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_not_b64 exec, exec
+ %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0)
+; GFX9-O0: buffer_store_dword v0
+; GFX9-O3: v_mov_b32_e32 v0, v2
+; GFX9: s_swappc_b64
+ %tmp134 = call i32 @called(i32 %tmp107)
+; GFX9-O0: buffer_load_dword v1
+; GFX9-O3: v_mov_b32_e32 v1, v0
+; GFX9-O0: v_add_u32_e32 v0, v0, v1
+; GFX9-O3: v_add_u32_e32 v1, v1, v2
+ %tmp136 = add i32 %tmp134, %tmp107
+ %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136)
+; GFX9-O0: buffer_store_dword v2
+; GFX9-O3: buffer_store_dword v0
+ call void @llvm.amdgcn.raw.buffer.store.i32(i32 %tmp137, <4 x i32> %tmp14, i32 4, i32 0, i32 0)
+ ret void
+}
+
+define i64 @called_i64(i64 %a) noinline {
+ %add = add i64 %a, %a
+ %mul = mul i64 %add, %a
+ %sub = sub i64 %mul, %add
+ ret i64 %sub
+}
+
+define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) {
+; GFX9-O0: v_mov_b32_e32 v0, s0
+; GFX9-O0: v_mov_b32_e32 v1, s1
+; GFX9-O3: v_mov_b32_e32 v7, s1
+; GFX9-O3: v_mov_b32_e32 v6, s0
+; GFX9-NEXT: s_not_b64 exec, exec
+; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-O0-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0
+; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0
+; GFX9-NEXT: s_not_b64 exec, exec
+ %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0)
+; GFX9-O0: buffer_store_dword v0
+; GFX9-O0: buffer_store_dword v1
+; GFX9: s_swappc_b64
+ %tmp134 = call i64 @called_i64(i64 %tmp107)
+; GFX9-O0: buffer_load_dword v3
+; GFX9-O0: buffer_load_dword v4
+ %tmp136 = add i64 %tmp134, %tmp107
+ %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136)
+ %tmp138 = bitcast i64 %tmp137 to <2 x i32>
+ call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %tmp138, <4 x i32> %tmp14, i32 4, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_cs void @_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %index) {
+ %tmp17 = shl i32 %index, 5
+; GFX9: buffer_load_dwordx4
+ %tmp18 = tail call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %tmp17, i32 0)
+ %.i0.upto1.bc = bitcast <4 x i32> %tmp18 to <2 x i64>
+ %tmp19 = or i32 %tmp17, 16
+; GFX9: buffer_load_dwordx2
+ %tmp20 = tail call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %tmp19, i32 0)
+ %.i0.upto1.extract = extractelement <2 x i64> %.i0.upto1.bc, i32 0
+ %tmp22 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %.i0.upto1.extract, i64 9223372036854775807)
+ %tmp97 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp22)
+ %.i1.upto1.extract = extractelement <2 x i64> %.i0.upto1.bc, i32 1
+ %tmp99 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %.i1.upto1.extract, i64 9223372036854775807)
+ %tmp174 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp99)
+ %.i25 = bitcast <2 x i32> %tmp20 to i64
+ %tmp176 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %.i25, i64 9223372036854775807)
+ %tmp251 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp176)
+ %.cast = bitcast i64 %tmp97 to <2 x float>
+ %.cast6 = bitcast i64 %tmp174 to <2 x float>
+ %.cast7 = bitcast i64 %tmp251 to <2 x float>
+ %tmp254 = shufflevector <2 x float> %.cast, <2 x float> %.cast6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; GFX9: buffer_store_dwordx4
+ tail call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %tmp254, <4 x i32> %desc, i32 %tmp17, i32 0, i32 0)
+ ; GFX9: buffer_store_dwordx2
+ tail call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %.cast7, <4 x i32> %desc, i32 %tmp19, i32 0, i32 0)
+ ret void
+}
+
+declare i32 @llvm.amdgcn.wwm.i32(i32)
+declare i64 @llvm.amdgcn.wwm.i64(i64)
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
+declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64)
+declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
+declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32)
+declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32)
+declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32)
+declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32)
+declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32)
+declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32)
+declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
+declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
OpenPOWER on IntegriCloud