diff options
author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-02-17 10:00:28 +0000 |
---|---|---|
committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-02-17 10:00:28 +0000 |
commit | b51a9bc35854509b9d4b125d232c4ff07ab765f5 (patch) | |
tree | cf3bacc75f84c4a5a98d34a787a49d1bdfccbeb1 | |
parent | b0e4f87fb073d517460fa562d31d94af28ce8bd0 (diff) | |
download | bcm5719-llvm-b51a9bc35854509b9d4b125d232c4ff07ab765f5.tar.gz bcm5719-llvm-b51a9bc35854509b9d4b125d232c4ff07ab765f5.zip |
[AMDGPU] Return true in enableMultipleCopyHints().
Enable multiple COPY hints to eliminate more COPYs during register allocation.
Note that this is something all targets should do, see
https://reviews.llvm.org/D38128.
Review: Stanislav Mekhanoshin, Tom Stellard.
llvm-svn: 325425
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ret.ll | 8 |
4 files changed, 14 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 00d1e1e8ddc..eb07ad69292 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -27,6 +27,8 @@ class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { AMDGPURegisterInfo(); + bool enableMultipleCopyHints() const override { return true; } + /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index 8f9fa41df88..de61b24c702 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -208,8 +208,8 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { ; GCN: enable_sgpr_workgroup_id_z = 0 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { @@ -223,8 +223,8 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { ; GCN: enable_sgpr_workgroup_id_z = 1 ; GCN: s_mov_b32 s33, s8 -; GCN: s_mov_b32 s4, s33 -; GCN: s_mov_b32 s6, s7 +; GCN-DAG: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() @@ -396,7 +396,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { ; GCN-DAG: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN-DAG: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 @@ -412,7 +412,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { ; GCN: s_mov_b32 s33, s8 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN: s_mov_b32 s4, s33 +; GCN-DAG: s_mov_b32 s4, s33 ; GCN-DAG: s_mov_b32 s6, s7 ; GCN: s_mov_b32 s32, s33 diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 13cb8b5f316..25c40dd0ada 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -220,8 +220,8 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; GCN: v_mov_b32_e32 v0, 0x22b -; GCN: v_mov_b32_e32 v1, v2 +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: v_mov_b32_e32 v1, v2 ; GCN: s_swappc_b64 ; GCN-NOT: v0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index d587f6a3da2..c86cfa198db 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -126,9 +126,9 @@ bb: ; GCN-LABEL: {{^}}vgpr_ps_addr119: ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 -; GCN: v_mov_b32_e32 v2, v6 -; GCN: v_mov_b32_e32 v3, v8 -; GCN: v_mov_b32_e32 v4, v12 +; GCN-DAG: v_mov_b32_e32 v2, v6 +; GCN-DAG: v_mov_b32_e32 v3, v8 +; GCN-DAG: v_mov_b32_e32 v4, v12 ; GCN-NOT: s_endpgm define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { bb: @@ -178,8 +178,8 @@ bb: } ; GCN-LABEL: {{^}}sgpr: -; GCN: s_add_i32 s0, s3, 2 ; GCN: s_mov_b32 s2, s3 +; GCN: s_add_i32 s0, s2, 2 ; GCN-NOT: s_endpgm define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: |