diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-11-07 21:53:43 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-11-07 21:53:43 +0000 |
commit | bc233f5523f43de34788179a11e390aaa69c8209 (patch) | |
tree | 8521369469d3c3e2a52afe36233671a4cfb8d1c8 /llvm/test/CodeGen/AMDGPU/smrd.ll | |
parent | 61396ff67c05863e63264431d209c6a91ff7dc53 (diff) | |
download | bcm5719-llvm-bc233f5523f43de34788179a11e390aaa69c8209.tar.gz bcm5719-llvm-bc233f5523f43de34788179a11e390aaa69c8209.zip |
Revert "AMDGPU: Divergence-driven selection of scalar buffer load intrinsics"
This reverts commit r344696 for now (except for some test additions).
See https://bugs.freedesktop.org/show_bug.cgi?id=108611.
llvm-svn: 346364
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/smrd.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/smrd.ll | 50 |
1 files changed, 20 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index f453cfdbd1f..c87145a1a5b 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -292,19 +292,18 @@ main_body: ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm: ; GCN-NEXT: %bb. -; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4092 ; +; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: - %off = add i32 %offset, 4092 + %off = add i32 %offset, 4095 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off) ret float %r } ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large: ; GCN-NEXT: %bb. -; SICI-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0 -; SICI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; -; VIGFX9-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 4 offen offset:4092 ; +; GCN-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0 +; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: %off = add i32 %offset, 4096 @@ -511,15 +510,12 @@ main_body: } ; GCN-LABEL: {{^}}smrd_load_nonconst4: -; SICI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xff8, v0 ; -; SICI-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; -; SICI-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; -; SICI-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; -; SICI-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 56 offen offset:4032 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 56 offen offset:4048 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 56 offen offset:4064 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 56 offen offset:4080 ; +; SICIVI: v_add_{{i32|u32}}_e32 v{{[0-9]+}}, vcc, 0xff8, v0 ; +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0xff8, v0 ; +; GCN-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; +; GCN-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; +; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; +; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; ; GCN: ; return to shader part epilog define amdgpu_ps <16 x float> @smrd_load_nonconst4(<4 x i32> inreg %rsrc, i32 %off) #0 { main_body: @@ -530,16 +526,12 @@ main_body: } ; GCN-LABEL: {{^}}smrd_load_nonconst5: -; SICI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x1004, v0 -; SICI-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; -; SICI-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; -; SICI-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; -; SICI-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; -; VIGFX9: s_movk_i32 s4, 0xfc0 -; VIGFX9-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], s4 offen offset:68 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], s4 offen offset:84 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], s4 offen offset:100 ; -; VIGFX9-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], s4 offen offset:116 ; +; SICIVI: v_add_{{i32|u32}}_e32 v{{[0-9]+}}, vcc, 0x1004, v0 +; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x1004, v0 +; GCN-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; +; GCN-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; +; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; +; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; ; GCN: ; return to shader part epilog define amdgpu_ps <16 x float> @smrd_load_nonconst5(<4 x i32> inreg %rsrc, i32 %off) #0 { main_body: @@ -567,10 +559,9 @@ main_body: ; GCN-LABEL: {{^}}smrd_uniform_loop: ; -; TODO: we should keep the loop counter in an SGPR +; TODO: this should use an s_buffer_load ; -; GCN: v_readfirstlane_b32 -; GCN: s_buffer_load_dword +; GCN: buffer_load_dword define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 { main_body: br label %loop @@ -594,10 +585,9 @@ exit: ; (this test differs from smrd_uniform_loop by the more complex structure of phis, ; which used to confuse the DivergenceAnalysis after structurization) ; -; TODO: we should keep the loop counter in an SGPR +; TODO: we should keep the loop counter in an SGPR and use an S_BUFFER_LOAD ; -; GCN: v_readfirstlane_b32 -; GCN: s_buffer_load_dword +; GCN: buffer_load_dword define amdgpu_ps float @smrd_uniform_loop2(<4 x i32> inreg %desc, i32 %bound, i32 %bound.a) #0 { main_body: br label %loop |