[MachineCopyPropagation] Extend pass to do COPY source forwarding

Summary: This change extends MachineCopyPropagation to do COPY source forwarding and adds an additional run of the pass to the default pass pipeline just after register allocation. This version of this patch uses the newly added MachineOperand::isRenamable bit to avoid forwarding registers is such a way as to violate constraints that aren't captured in the Machine IR (e.g. ABI or ISA constraints). This change is a continuation of the work started in D30751. Reviewers: qcolombet, javed.absar, MatzeB, jonpa, tstellar Subscribers: tpr, mgorny, mcrosier, nhaehnle, nemanjai, jyknight, hfinkel, arsenm, inouehrs, eraman, sdardis, guyblank, fedor.sergeev, aheejin, dschuff, jfb, myatsina, llvm-commits Differential Revision: https://reviews.llvm.org/D41835 llvm-svn: 323991
author: Geoff Berry <gberry@codeaurora.org> 2018-02-01 18:54:01 +0000
committer: Geoff Berry <gberry@codeaurora.org> 2018-02-01 18:54:01 +0000
commit: 94503c7bc3d70f51ab03c03b2067db3e973efa19 (patch)
tree: 8db2d28d7793aa60b2cde1e630798f3e3c3fd6e7 /llvm/test/CodeGen/AMDGPU
parent: a95bd9f72414a7d26f21a4ee5a0f40ff1d0c951a (diff)
download: bcm5719-llvm-94503c7bc3d70f51ab03c03b2067db3e973efa19.tar.gz
bcm5719-llvm-94503c7bc3d70f51ab03c03b2067db3e973efa19.zip
4 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index 8082a095a08..343ba691af5 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -547,16 +547,16 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
 ; GCN: s_mov_b32 s5, s32
 ; GCN: s_add_u32 s32, s32, 0x300
 
-; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-9]+]], s14
-; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-9]+]], s15
-; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-9]+]], s16
+; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
+; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
+; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
 
-; GCN-DAG: s_mov_b32 s6, [[SAVE_X]]
-; GCN-DAG: s_mov_b32 s7, [[SAVE_Y]]
-; GCN-DAG: s_mov_b32 s8, [[SAVE_Z]]
+; GCN-DAG: s_mov_b32 s6, s14
+; GCN-DAG: s_mov_b32 s7, s15
+; GCN-DAG: s_mov_b32 s8, s16
 ; GCN: s_swappc_b64
 
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
index c53ff28b547..66a238dff82 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -start-after=greedy -disable-copyprop -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s
 # Check that we first do all vector instructions and only then change exec
 # CHECK-DAG:  COPY $vgpr10_vgpr11
 # CHECK-DAG:  COPY $vgpr12_vgpr13
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 8cc02d49709..5917f87e19a 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -78,7 +78,7 @@ ENDIF:                                            ; preds = %LOOP
 
 ; Uses a copy intsead of an or
 ; GCN: s_mov_b64 [[COPY:s\[[0-9]+:[0-9]+\]]], [[BREAK_REG]]
-; GCN: s_or_b64 [[BREAK_REG]], exec, [[COPY]]
+; GCN: s_or_b64 [[BREAK_REG]], exec, [[BREAK_REG]]
 define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 {
 bb:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll
index 831c71dff79..2d673a9b0cd 100644
--- a/llvm/test/CodeGen/AMDGPU/ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret.ll
@@ -2,10 +2,10 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}vgpr:
-; GCN: v_mov_b32_e32 v1, v0
-; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
-; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
+; GCN-DAG: v_mov_b32_e32 v1, v0
+; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
 ; GCN: s_waitcnt expcnt(0)
+; GCN: v_add_f32_e32 v0, 1.0, v0
 ; GCN-NOT: s_endpgm
 define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 bb:
@@ -204,13 +204,13 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}both:
-; GCN: v_mov_b32_e32 v1, v0
-; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
-; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
-; GCN-DAG: s_add_i32 s0, s3, 2
+; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
+; GCN-DAG: v_mov_b32_e32 v1, v0
 ; GCN-DAG: s_mov_b32 s1, s2
-; GCN: s_mov_b32 s2, s3
 ; GCN: s_waitcnt expcnt(0)
+; GCN: v_add_f32_e32 v0, 1.0, v0
+; GCN-DAG: s_add_i32 s0, s3, 2
+; GCN-DAG: s_mov_b32 s2, s3
 ; GCN-NOT: s_endpgm
 define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 bb:
author	Geoff Berry <gberry@codeaurora.org>	2018-02-01 18:54:01 +0000
committer	Geoff Berry <gberry@codeaurora.org>	2018-02-01 18:54:01 +0000
commit	94503c7bc3d70f51ab03c03b2067db3e973efa19 (patch)
tree	8db2d28d7793aa60b2cde1e630798f3e3c3fd6e7 /llvm/test/CodeGen/AMDGPU
parent	a95bd9f72414a7d26f21a4ee5a0f40ff1d0c951a (diff)
download	bcm5719-llvm-94503c7bc3d70f51ab03c03b2067db3e973efa19.tar.gz bcm5719-llvm-94503c7bc3d70f51ab03c03b2067db3e973efa19.zip