4 files changed, 28 insertions, 77 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
index 617204fdf33..aba0b63a254 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
@@ -1,20 +1,16 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-; If spilling to smem, additional registers are used for the resource
-; descriptor.
-
-; ALL-LABEL: {{^}}max_14_sgprs:
+; CHECK-LABEL: {{^}}max_14_sgprs:
 
 ; FIXME: Should be ablo to skip this copying of the private segment
 ; buffer because all the SGPR spills are to VGPRs.
 
-; ALL: s_mov_b64 s[6:7], s[2:3]
-; ALL: s_mov_b64 s[4:5], s[0:1]
-; ALL: SGPRBlocks: 1
-; ALL: NumSGPRsForWavesPerEU: 14
-define void @max_14_sgprs(i32 addrspace(1)* %out1,
+; CHECK: s_mov_b64 s[6:7], s[2:3]
+; CHECK: s_mov_b64 s[4:5], s[0:1]
 
+; CHECK: SGPRBlocks: 1
+; CHECK: NumSGPRsForWavesPerEU: 14
+define void @max_14_sgprs(i32 addrspace(1)* %out1,
                           i32 addrspace(1)* %out2,
                           i32 addrspace(1)* %out3,
                           i32 addrspace(1)* %out4,
@@ -35,7 +31,7 @@ define void @max_14_sgprs(i32 addrspace(1)* %out1,
 ; ---------------------
 ; total: 14
 
-; + reserved vcc, xnack, flat_scratch = 20
+; + reserved vcc, flat_scratch = 18
 
 ; Because we can't handle re-using the last few input registers as the
 ; special vcc etc. registers (as well as decide to not use the unused
@@ -44,14 +40,14 @@ define void @max_14_sgprs(i32 addrspace(1)* %out1,
 
 ; ALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
 ; TOSGPR: SGPRBlocks: 2
-; TOSGPR: NumSGPRsForWavesPerEU: 20
+; TOSGPR: NumSGPRsForWavesPerEU: 18
 
 ; TOSMEM: s_mov_b64 s[6:7], s[2:3]
+; TOSMEM: s_mov_b32 s9, s13
 ; TOSMEM: s_mov_b64 s[4:5], s[0:1]
-; TOSMEM: s_mov_b32 s3, s13
 
 ; TOSMEM: SGPRBlocks: 2
-; TOSMEM: NumSGPRsForWavesPerEU: 20
+; TOSMEM: NumSGPRsForWavesPerEU: 18
 define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
                                         i32 addrspace(1)* %out2,
                                         i32 addrspace(1)* %out3,
@@ -83,12 +79,12 @@ define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
 ; ; swapping the order the registers are copied from what normally
 ; ; happens.
 
-; TOSMEM: s_mov_b32 s5, s11
-; TOSMEM: s_add_u32 m0, s5,
-; TOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
+; TOSMEM: s_mov_b64 s[6:7], s[2:3]
+; TOSMEM: s_mov_b64 s[4:5], s[0:1]
+; TOSMEM: s_mov_b32 s3, s11
 
-; ALL: SGPRBlocks: 2
-; ALL: NumSGPRsForWavesPerEU: 18
+; ALL: SGPRBlocks: 1
+; ALL: NumSGPRsForWavesPerEU: 16
 define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
                                         i32 addrspace(1)* %out2,
                                         i32 addrspace(1)* %out3,
diff --git a/llvm/test/CodeGen/AMDGPU/basic-branch.ll b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
index 83313ed5327..7bc4d735feb 100644
--- a/llvm/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
-; RUN: llc -O0 -march=amdgcn -mcpu=tonga -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
+; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
index f267eb47559..c3d9ee7f13f 100644
--- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
@@ -1,44 +1,14 @@
-; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
-; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
 ; Make sure this doesn't crash.
-; ALL-LABEL: {{^}}test:
-; ALL: s_mov_b32 s92, SCRATCH_RSRC_DWORD0
-; ALL: s_mov_b32 s91, s3
-
+; CHECK: {{^}}test:
 ; Make sure we are handling hazards correctly.
-; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
-; SGPR-NEXT: s_waitcnt vmcnt(0)
-; SGPR-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]]
-; SGPR-NEXT: s_nop 4
-; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
-
-
-; Make sure scratch wave offset register is correctly incremented and
-; then restored.
-; SMEM: s_mov_b32 m0, s91{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
-; SMEM: s_add_u32 m0, s91, 0x100{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
-; SMEM: s_add_u32 m0, s91, 0x200{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
-; SMEM: s_add_u32 m0, s91, 0x300{{$}}
-; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
-
-
-; SMEM: s_mov_b32 m0, s91{{$}}
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
-; SMEM: s_add_u32 m0, s91, 0x100{{$}}
-; SMEM: s_waitcnt lgkmcnt(0)
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
-; SMEM: s_add_u32 m0, s91, 0x200{{$}}
-; SMEM: s_waitcnt lgkmcnt(0)
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
-; SMEM: s_add_u32 m0, s91, 0x300{{$}}
-; SMEM: s_waitcnt lgkmcnt(0)
-; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
-
-; ALL: s_endpgm
+; CHECK: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]]
+; CHECK-NEXT: s_nop 4
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
+; CHECK: s_endpgm
 define void @test(i32 addrspace(1)* %out, i32 %in) {
   call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
   call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index c5ef75e5fb7..74e33d11bed 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -1,13 +1,12 @@
 ; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling  -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling  -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
 ; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
-; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s
+; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
 
 ; XXX - Why does it like to use vcc?
 
 ; GCN-LABEL: {{^}}spill_m0:
-; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0
+; TOSMEM: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
 
 ; GCN: s_cmp_lg_u32
 
@@ -17,13 +16,6 @@
 ; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0
 ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
 ; TOVMEM: s_waitcnt vmcnt(0)
-
-; TOSMEM: s_mov_b32 vcc_hi, m0
-; TOSMEM: s_mov_b32 m0, s3{{$}}
-; TOSMEM-NOT: vcc_hi
-; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill
-; TOSMEM: s_waitcnt lgkmcnt(0)
-
 ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
 
 ; GCN: [[ENDIF]]:
@@ -35,11 +27,6 @@
 ; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]]
 ; TOVMEM: s_mov_b32 m0, vcc_hi
 
-; TOSMEM: s_mov_b32 m0, s3{{$}}
-; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload
-; TOSMEM-NOT: vcc_hi
-; TOSMEM: s_mov_b32 m0, vcc_hi
-
 ; GCN: s_add_i32 m0, m0, 1
 define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
 entry:
@@ -61,8 +48,6 @@ endif:
 
 ; GCN-LABEL: {{^}}spill_m0_lds:
 ; GCN-NOT: v_readlane_b32 m0
-; GCN-NOT: s_buffer_store_dword m0
-; GCN-NOT: s_buffer_load_dword m0
 define amdgpu_ps void @spill_m0_lds(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) #0 {
 main_body:
   %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)