AMDGPU: Do not combine loads/store across physreg defs

Summary: Since this pass operates on machine SSA form, this should only really affect M0 in practice. Fixes various piglit variable-indexing/vs-varying-array-mat4-index-* Change-Id: Ib2a1dc3a8d7b08225a8da49a86f533faa0986aa8 Fixes: r317751 ("AMDGPU: Merge S_BUFFER_LOAD_DWORD_IMM into x2, x4") Reviewers: arsenm, mareko, rampitec Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D40343 llvm-svn: 325677
author: Nicolai Haehnle <nhaehnle@gmail.com> 2018-02-21 13:31:35 +0000
committer: Nicolai Haehnle <nhaehnle@gmail.com> 2018-02-21 13:31:35 +0000
commit: 770397f4cdcfbf2c0e0a9604a4d6065063197317 (patch)
tree: 8104868ad30833555c235eb51b2c376922d07ef4 /llvm/test/CodeGen/AMDGPU/smrd.ll
parent: d6e1a9404db84990eb428484c28ed978040561ef (diff)
download: bcm5719-llvm-770397f4cdcfbf2c0e0a9604a4d6065063197317.tar.gz
bcm5719-llvm-770397f4cdcfbf2c0e0a9604a4d6065063197317.zip
1 files changed, 45 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index a326942e43d..12190d52039 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -232,6 +232,48 @@ main_body:
   ret void
 }
 
+; GCN-LABEL: {{^}}smrd_imm_nomerge_m0:
+;
+; In principle we could merge the loads here as well, but it would require
+; careful tracking of physical registers since both v_interp* and v_movrel*
+; instructions (or gpr idx mode) use M0.
+;
+; GCN: s_buffer_load_dword
+; GCN: s_buffer_load_dword
+define amdgpu_ps float @smrd_imm_nomerge_m0(<4 x i32> inreg %desc, i32 inreg %prim, float %u, float %v) #0 {
+main_body:
+  %idx1.f = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 0)
+  %idx1 = bitcast float %idx1.f to i32
+
+  %v0.x1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 0, i32 0, i32 %prim)
+  %v0.x = call nsz float @llvm.amdgcn.interp.p2(float %v0.x1, float %v, i32 0, i32 0, i32 %prim)
+  %v0.y1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 0, i32 1, i32 %prim)
+  %v0.y = call nsz float @llvm.amdgcn.interp.p2(float %v0.y1, float %v, i32 0, i32 1, i32 %prim)
+  %v0.z1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 0, i32 2, i32 %prim)
+  %v0.z = call nsz float @llvm.amdgcn.interp.p2(float %v0.z1, float %v, i32 0, i32 2, i32 %prim)
+  %v0.tmp0 = insertelement <3 x float> undef, float %v0.x, i32 0
+  %v0.tmp1 = insertelement <3 x float> %v0.tmp0, float %v0.y, i32 1
+  %v0 = insertelement <3 x float> %v0.tmp1, float %v0.z, i32 2
+  %a = extractelement <3 x float> %v0, i32 %idx1
+
+  %v1.x1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 1, i32 0, i32 %prim)
+  %v1.x = call nsz float @llvm.amdgcn.interp.p2(float %v1.x1, float %v, i32 1, i32 0, i32 %prim)
+  %v1.y1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 1, i32 1, i32 %prim)
+  %v1.y = call nsz float @llvm.amdgcn.interp.p2(float %v1.y1, float %v, i32 1, i32 1, i32 %prim)
+  %v1.z1 = call nsz float @llvm.amdgcn.interp.p1(float %u, i32 1, i32 2, i32 %prim)
+  %v1.z = call nsz float @llvm.amdgcn.interp.p2(float %v1.z1, float %v, i32 1, i32 2, i32 %prim)
+  %v1.tmp0 = insertelement <3 x float> undef, float %v0.x, i32 0
+  %v1.tmp1 = insertelement <3 x float> %v0.tmp0, float %v0.y, i32 1
+  %v1 = insertelement <3 x float> %v0.tmp1, float %v0.z, i32 2
+
+  %b = extractelement <3 x float> %v1, i32 %idx1
+  %c = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 4)
+
+  %res.tmp = fadd float %a, %b
+  %res = fadd float %res.tmp, %c
+  ret float %res
+}
+
 ; GCN-LABEL: {{^}}smrd_vgpr_merged:
 ; GCN-NEXT: %bb.
 ; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
@@ -289,8 +331,11 @@ ret_block:                                       ; preds = %.outer, %.label22, %
 
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readnone speculatable }
 
 !0 = !{}
author	Nicolai Haehnle <nhaehnle@gmail.com>	2018-02-21 13:31:35 +0000
committer	Nicolai Haehnle <nhaehnle@gmail.com>	2018-02-21 13:31:35 +0000
commit	770397f4cdcfbf2c0e0a9604a4d6065063197317 (patch)
tree	8104868ad30833555c235eb51b2c376922d07ef4 /llvm/test/CodeGen/AMDGPU/smrd.ll
parent	d6e1a9404db84990eb428484c28ed978040561ef (diff)
download	bcm5719-llvm-770397f4cdcfbf2c0e0a9604a4d6065063197317.tar.gz bcm5719-llvm-770397f4cdcfbf2c0e0a9604a4d6065063197317.zip