diff options
| author | Geoff Berry <gberry@codeaurora.org> | 2017-08-30 18:41:07 +0000 |
|---|---|---|
| committer | Geoff Berry <gberry@codeaurora.org> | 2017-08-30 18:41:07 +0000 |
| commit | feffb0c8af00b975d678a364fff19c4ffb8bfd61 (patch) | |
| tree | 18f81c042df484ad2b70a8e3592d8bf6b591625e /llvm/test/CodeGen/AMDGPU | |
| parent | 37b1d7b36fa144b0733dfc1659defe80cf384b15 (diff) | |
| download | bcm5719-llvm-feffb0c8af00b975d678a364fff19c4ffb8bfd61.tar.gz bcm5719-llvm-feffb0c8af00b975d678a364fff19c4ffb8bfd61.zip | |
Re-enable "[MachineCopyPropagation] Extend pass to do COPY source forwarding"
Issues identified by buildbots addressed since original review:
- Fixed ARMLoadStoreOptimizer bug exposed by this change in r311907.
- The pass no longer forwards COPYs to physical register uses, since
doing so can break code that implicitly relies on the physical
register number of the use.
- The pass no longer forwards COPYs to undef uses, since doing so
can break the machine verifier by creating LiveRanges that don't
end on a use (since the undef operand is not considered a use).
[MachineCopyPropagation] Extend pass to do COPY source forwarding
This change extends MachineCopyPropagation to do COPY source forwarding.
This change also extends the MachineCopyPropagation pass to be able to
be run during register allocation, after physical registers have been
assigned, but before the virtual registers have been re-written, which
allows it to remove virtual register COPY LiveIntervals that become dead
through the forwarding of all of their uses.
llvm-svn: 312154
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ret.ll | 16 |
2 files changed, 14 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index 1518c0e503e..304178d3f7e 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -547,16 +547,16 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { ; GCN: s_mov_b32 s5, s32 ; GCN: s_add_u32 s32, s32, 0x300 -; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-9]+]], s14 -; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-9]+]], s15 -; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-9]+]], s16 +; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14 +; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15 +; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7] ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9] ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11] -; GCN-DAG: s_mov_b32 s6, [[SAVE_X]] -; GCN-DAG: s_mov_b32 s7, [[SAVE_Y]] -; GCN-DAG: s_mov_b32 s8, [[SAVE_Z]] +; GCN-DAG: s_mov_b32 s6, s14 +; GCN-DAG: s_mov_b32 s7, s15 +; GCN-DAG: s_mov_b32 s8, s16 ; GCN: s_swappc_b64 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index 831c71dff79..2d673a9b0cd 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -2,10 +2,10 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}vgpr: -; GCN: v_mov_b32_e32 v1, v0 -; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 -; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm +; GCN-DAG: v_mov_b32_e32 v1, v0 +; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm ; GCN: s_waitcnt expcnt(0) +; GCN: v_add_f32_e32 v0, 1.0, v0 ; GCN-NOT: s_endpgm define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: @@ -204,13 +204,13 @@ bb: } ; GCN-LABEL: {{^}}both: -; GCN: v_mov_b32_e32 v1, v0 -; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm -; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 -; GCN-DAG: s_add_i32 s0, s3, 2 +; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm +; GCN-DAG: v_mov_b32_e32 v1, v0 ; GCN-DAG: s_mov_b32 s1, s2 -; GCN: s_mov_b32 s2, s3 ; GCN: s_waitcnt expcnt(0) +; GCN: v_add_f32_e32 v0, 1.0, v0 +; GCN-DAG: s_add_i32 s0, s3, 2 +; GCN-DAG: s_mov_b32 s2, s3 ; GCN-NOT: s_endpgm define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: |

