diff options
author | Matthias Braun <matze@braunis.de> | 2017-06-17 02:08:18 +0000 |
---|---|---|
committer | Matthias Braun <matze@braunis.de> | 2017-06-17 02:08:18 +0000 |
commit | 537d0391049745ea4b4d71c2aa63129d2ba6f55c (patch) | |
tree | dbbd7428612e286c648883f0d9c0b7acba75c328 /llvm/test/CodeGen/AMDGPU | |
parent | d123c194e014cb9ce3f42e8b4de93f2b6eb0a4d5 (diff) | |
download | bcm5719-llvm-537d0391049745ea4b4d71c2aa63129d2ba6f55c.tar.gz bcm5719-llvm-537d0391049745ea4b4d71c2aa63129d2ba6f55c.zip |
RegScavenging: Add scavengeRegisterBackwards()
Re-apply r276044/r279124/r305516. Fixed a problem where we would refuse
to place spills as the very first instruciton of a basic block and thus
artifically increase pressure (test in
test/CodeGen/PowerPC/scavenging.mir:spill_at_begin)
This is a variant of scavengeRegister() that works for
enterBasicBlockEnd()/backward(). The benefit of the backward mode is
that it is not affected by incomplete kill flags.
This patch also changes
PrologEpilogInserter::doScavengeFrameVirtualRegs() to use the register
scavenger in backwards mode.
Differential Revision: http://reviews.llvm.org/D21885
llvm-svn: 305625
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
3 files changed, 50 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll index ac2f7b4a4a4..822ea803194 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -39,44 +39,49 @@ define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1, ; features when the number of registers is frozen), this ends up using ; more than expected. -; ALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs: -; TOSGPR: SGPRBlocks: 1 -; TOSGPR: NumSGPRsForWavesPerEU: 16 +; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs: +; XTOSGPR: SGPRBlocks: 1 +; XTOSGPR: NumSGPRsForWavesPerEU: 16 -; TOSMEM: s_mov_b64 s[10:11], s[2:3] -; TOSMEM: s_mov_b64 s[8:9], s[0:1] -; TOSMEM: s_mov_b32 s7, s13 +; XTOSMEM: s_mov_b64 s[10:11], s[2:3] +; XTOSMEM: s_mov_b64 s[8:9], s[0:1] +; XTOSMEM: s_mov_b32 s7, s13 -; TOSMEM: SGPRBlocks: 1 -; TOSMEM: NumSGPRsForWavesPerEU: 16 -define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, - i32 addrspace(1)* %out2, - i32 addrspace(1)* %out3, - i32 addrspace(1)* %out4, - i32 %one, i32 %two, i32 %three, i32 %four) #2 { - %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() - %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() - %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() - %x.3 = call i64 @llvm.amdgcn.dispatch.id() - %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() - %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() - store volatile i32 0, i32* undef - br label %stores - -stores: - store volatile i32 %x.0, i32 addrspace(1)* undef - store volatile i32 %x.0, i32 addrspace(1)* undef - store volatile i32 %x.0, i32 addrspace(1)* undef - store volatile i64 %x.3, i64 addrspace(1)* undef - store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef - store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef - - store i32 %one, i32 addrspace(1)* %out1 - store i32 %two, i32 addrspace(1)* %out2 - store i32 %three, i32 addrspace(1)* %out3 - store i32 %four, i32 addrspace(1)* %out4 - ret void -} +; XTOSMEM: SGPRBlocks: 1 +; XTOSMEM: NumSGPRsForWavesPerEU: 16 +; +; This test case is disabled: When calculating the spillslot addresses AMDGPU +; creates an extra vreg to save/restore m0 which in a point of maximum register +; pressure would trigger an endless loop; the compiler aborts earlier with +; "Incomplete scavenging after 2nd pass" in practice. +;define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, +; i32 addrspace(1)* %out2, +; i32 addrspace(1)* %out3, +; i32 addrspace(1)* %out4, +; i32 %one, i32 %two, i32 %three, i32 %four) #2 { +; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() +; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() +; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() +; %x.3 = call i64 @llvm.amdgcn.dispatch.id() +; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() +; %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() +; store volatile i32 0, i32* undef +; br label %stores +; +;stores: +; store volatile i32 %x.0, i32 addrspace(1)* undef +; store volatile i32 %x.0, i32 addrspace(1)* undef +; store volatile i32 %x.0, i32 addrspace(1)* undef +; store volatile i64 %x.3, i64 addrspace(1)* undef +; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef +; store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef +; +; store i32 %one, i32 addrspace(1)* %out1 +; store i32 %two, i32 addrspace(1)* %out2 +; store i32 %three, i32 addrspace(1)* %out3 +; store i32 %four, i32 addrspace(1)* %out4 +; ret void +;} ; The following test is commented out for now; http://llvm.org/PR31230 ; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll b/llvm/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll index 0796c24b331..0ffc9220315 100644 --- a/llvm/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll +++ b/llvm/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll @@ -12,8 +12,8 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK: DebugProps: ; CHECK: DebuggerABIVersion: [ 1, 0 ] ; CHECK: ReservedNumVGPRs: 4 -; GFX700: ReservedFirstVGPR: 11 -; GFX800: ReservedFirstVGPR: 11 +; GFX700: ReservedFirstVGPR: 8 +; GFX800: ReservedFirstVGPR: 8 ; GFX9: ReservedFirstVGPR: 14 ; CHECK: PrivateSegmentBufferSGPR: 0 ; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index d67988b4632..eab73b90130 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -22,9 +22,9 @@ define void @func_mov_fi_i32() #0 { ; GCN-LABEL: {{^}}func_add_constant_to_fi_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: s_sub_u32 s6, s5, s4 -; GCN-NEXT: s_lshr_b32 s6, s6, 6 -; GCN-NEXT: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, s6, 4 +; GCN: s_sub_u32 vcc_hi, s5, s4 +; GCN-NEXT: s_lshr_b32 vcc_hi, vcc_hi, 6 +; GCN-NEXT: v_add_i32_e64 v0, {{s\[[0-9]+:[0-9]+\]|vcc}}, vcc_hi, 4 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -71,8 +71,8 @@ define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 { ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr: ; GCN: s_waitcnt -; GCN-NEXT: s_sub_u32 s6, s5, s4 -; GCN-NEXT: v_lshr_b32_e64 v0, s6, 6 +; GCN-NEXT: s_sub_u32 vcc_hi, s5, s4 +; GCN-NEXT: v_lshr_b32_e64 v0, vcc_hi, 6 ; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 ; GCN-NOT: v_mov ; GCN: ds_write_b32 v0, v0 @@ -99,8 +99,8 @@ define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) # } ; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: -; GCN: s_sub_u32 s8, s5, s4 -; GCN: v_lshr_b32_e64 v1, s8, 6 +; GCN: s_sub_u32 vcc_hi, s5, s4 +; GCN: v_lshr_b32_e64 v1, vcc_hi, 6 ; GCN: s_and_saveexec_b64 ; GCN: v_add_i32_e32 v0, vcc, 4, v1 |