diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/basic-branch.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll | 46 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/spill-m0.ll | 21 |
4 files changed, 28 insertions, 77 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll index 617204fdf33..aba0b63a254 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -1,20 +1,16 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s -; If spilling to smem, additional registers are used for the resource -; descriptor. - -; ALL-LABEL: {{^}}max_14_sgprs: +; CHECK-LABEL: {{^}}max_14_sgprs: ; FIXME: Should be ablo to skip this copying of the private segment ; buffer because all the SGPR spills are to VGPRs. -; ALL: s_mov_b64 s[6:7], s[2:3] -; ALL: s_mov_b64 s[4:5], s[0:1] -; ALL: SGPRBlocks: 1 -; ALL: NumSGPRsForWavesPerEU: 14 -define void @max_14_sgprs(i32 addrspace(1)* %out1, +; CHECK: s_mov_b64 s[6:7], s[2:3] +; CHECK: s_mov_b64 s[4:5], s[0:1] +; CHECK: SGPRBlocks: 1 +; CHECK: NumSGPRsForWavesPerEU: 14 +define void @max_14_sgprs(i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, @@ -35,7 +31,7 @@ define void @max_14_sgprs(i32 addrspace(1)* %out1, ; --------------------- ; total: 14 -; + reserved vcc, xnack, flat_scratch = 20 +; + reserved vcc, flat_scratch = 18 ; Because we can't handle re-using the last few input registers as the ; special vcc etc. registers (as well as decide to not use the unused @@ -44,14 +40,14 @@ define void @max_14_sgprs(i32 addrspace(1)* %out1, ; ALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs: ; TOSGPR: SGPRBlocks: 2 -; TOSGPR: NumSGPRsForWavesPerEU: 20 +; TOSGPR: NumSGPRsForWavesPerEU: 18 ; TOSMEM: s_mov_b64 s[6:7], s[2:3] +; TOSMEM: s_mov_b32 s9, s13 ; TOSMEM: s_mov_b64 s[4:5], s[0:1] -; TOSMEM: s_mov_b32 s3, s13 ; TOSMEM: SGPRBlocks: 2 -; TOSMEM: NumSGPRsForWavesPerEU: 20 +; TOSMEM: NumSGPRsForWavesPerEU: 18 define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, @@ -83,12 +79,12 @@ define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, ; ; swapping the order the registers are copied from what normally ; ; happens. -; TOSMEM: s_mov_b32 s5, s11 -; TOSMEM: s_add_u32 m0, s5, -; TOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0 +; TOSMEM: s_mov_b64 s[6:7], s[2:3] +; TOSMEM: s_mov_b64 s[4:5], s[0:1] +; TOSMEM: s_mov_b32 s3, s11 -; ALL: SGPRBlocks: 2 -; ALL: NumSGPRsForWavesPerEU: 18 +; ALL: SGPRBlocks: 1 +; ALL: NumSGPRsForWavesPerEU: 16 define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, diff --git a/llvm/test/CodeGen/AMDGPU/basic-branch.ll b/llvm/test/CodeGen/AMDGPU/basic-branch.ll index 83313ed5327..7bc4d735feb 100644 --- a/llvm/test/CodeGen/AMDGPU/basic-branch.ll +++ b/llvm/test/CodeGen/AMDGPU/basic-branch.ll @@ -1,5 +1,5 @@ ; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s -; RUN: llc -O0 -march=amdgcn -mcpu=tonga -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll index f267eb47559..c3d9ee7f13f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -1,44 +1,14 @@ -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s -; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s ; Make sure this doesn't crash. -; ALL-LABEL: {{^}}test: -; ALL: s_mov_b32 s92, SCRATCH_RSRC_DWORD0 -; ALL: s_mov_b32 s91, s3 - +; CHECK: {{^}}test: ; Make sure we are handling hazards correctly. -; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12 -; SGPR-NEXT: s_waitcnt vmcnt(0) -; SGPR-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]] -; SGPR-NEXT: s_nop 4 -; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0 - - -; Make sure scratch wave offset register is correctly incremented and -; then restored. -; SMEM: s_mov_b32 m0, s91{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill -; SMEM: s_add_u32 m0, s91, 0x100{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill -; SMEM: s_add_u32 m0, s91, 0x200{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill -; SMEM: s_add_u32 m0, s91, 0x300{{$}} -; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill - - -; SMEM: s_mov_b32 m0, s91{{$}} -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload -; SMEM: s_add_u32 m0, s91, 0x100{{$}} -; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload -; SMEM: s_add_u32 m0, s91, 0x200{{$}} -; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload -; SMEM: s_add_u32 m0, s91, 0x300{{$}} -; SMEM: s_waitcnt lgkmcnt(0) -; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload - -; ALL: s_endpgm +; CHECK: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]] +; CHECK-NEXT: s_nop 4 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0 +; CHECK: s_endpgm define void @test(i32 addrspace(1)* %out, i32 %in) { call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll index c5ef75e5fb7..74e33d11bed 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -1,13 +1,12 @@ ; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s ; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s ; XXX - Why does it like to use vcc? ; GCN-LABEL: {{^}}spill_m0: -; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0 +; TOSMEM: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; GCN: s_cmp_lg_u32 @@ -17,13 +16,6 @@ ; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0 ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill ; TOVMEM: s_waitcnt vmcnt(0) - -; TOSMEM: s_mov_b32 vcc_hi, m0 -; TOSMEM: s_mov_b32 m0, s3{{$}} -; TOSMEM-NOT: vcc_hi -; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill -; TOSMEM: s_waitcnt lgkmcnt(0) - ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ENDIF]]: @@ -35,11 +27,6 @@ ; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]] ; TOVMEM: s_mov_b32 m0, vcc_hi -; TOSMEM: s_mov_b32 m0, s3{{$}} -; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload -; TOSMEM-NOT: vcc_hi -; TOSMEM: s_mov_b32 m0, vcc_hi - ; GCN: s_add_i32 m0, m0, 1 define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 { entry: @@ -61,8 +48,6 @@ endif: ; GCN-LABEL: {{^}}spill_m0_lds: ; GCN-NOT: v_readlane_b32 m0 -; GCN-NOT: s_buffer_store_dword m0 -; GCN-NOT: s_buffer_load_dword m0 define amdgpu_ps void @spill_m0_lds(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) #0 { main_body: %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3) |

