summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/smrd.ll
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2018-11-07 21:53:43 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2018-11-07 21:53:43 +0000
commitbc233f5523f43de34788179a11e390aaa69c8209 (patch)
tree8521369469d3c3e2a52afe36233671a4cfb8d1c8 /llvm/test/CodeGen/AMDGPU/smrd.ll
parent61396ff67c05863e63264431d209c6a91ff7dc53 (diff)
downloadbcm5719-llvm-bc233f5523f43de34788179a11e390aaa69c8209.tar.gz
bcm5719-llvm-bc233f5523f43de34788179a11e390aaa69c8209.zip
Revert "AMDGPU: Divergence-driven selection of scalar buffer load intrinsics"
This reverts commit r344696 for now (except for some test additions). See https://bugs.freedesktop.org/show_bug.cgi?id=108611. llvm-svn: 346364
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/smrd.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/smrd.ll50
1 files changed, 20 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index f453cfdbd1f..c87145a1a5b 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -292,19 +292,18 @@ main_body:
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
; GCN-NEXT: %bb.
-; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4092 ;
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
- %off = add i32 %offset, 4092
+ %off = add i32 %offset, 4095
%r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off)
ret float %r
}
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large:
; GCN-NEXT: %bb.
-; SICI-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0
-; SICI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
-; VIGFX9-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 4 offen offset:4092 ;
+; GCN-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ;
define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
%off = add i32 %offset, 4096
@@ -511,15 +510,12 @@ main_body:
}
; GCN-LABEL: {{^}}smrd_load_nonconst4:
-; SICI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xff8, v0 ;
-; SICI-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ;
-; SICI-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ;
-; SICI-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ;
-; SICI-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 56 offen offset:4032 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 56 offen offset:4048 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 56 offen offset:4064 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 56 offen offset:4080 ;
+; SICIVI: v_add_{{i32|u32}}_e32 v{{[0-9]+}}, vcc, 0xff8, v0 ;
+; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0xff8, v0 ;
+; GCN-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ;
+; GCN-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ;
+; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ;
+; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ;
; GCN: ; return to shader part epilog
define amdgpu_ps <16 x float> @smrd_load_nonconst4(<4 x i32> inreg %rsrc, i32 %off) #0 {
main_body:
@@ -530,16 +526,12 @@ main_body:
}
; GCN-LABEL: {{^}}smrd_load_nonconst5:
-; SICI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x1004, v0
-; SICI-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ;
-; SICI-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ;
-; SICI-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ;
-; SICI-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ;
-; VIGFX9: s_movk_i32 s4, 0xfc0
-; VIGFX9-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], s4 offen offset:68 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], s4 offen offset:84 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], s4 offen offset:100 ;
-; VIGFX9-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], s4 offen offset:116 ;
+; SICIVI: v_add_{{i32|u32}}_e32 v{{[0-9]+}}, vcc, 0x1004, v0
+; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x1004, v0
+; GCN-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ;
+; GCN-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ;
+; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ;
+; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ;
; GCN: ; return to shader part epilog
define amdgpu_ps <16 x float> @smrd_load_nonconst5(<4 x i32> inreg %rsrc, i32 %off) #0 {
main_body:
@@ -567,10 +559,9 @@ main_body:
; GCN-LABEL: {{^}}smrd_uniform_loop:
;
-; TODO: we should keep the loop counter in an SGPR
+; TODO: this should use an s_buffer_load
;
-; GCN: v_readfirstlane_b32
-; GCN: s_buffer_load_dword
+; GCN: buffer_load_dword
define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 {
main_body:
br label %loop
@@ -594,10 +585,9 @@ exit:
; (this test differs from smrd_uniform_loop by the more complex structure of phis,
; which used to confuse the DivergenceAnalysis after structurization)
;
-; TODO: we should keep the loop counter in an SGPR
+; TODO: we should keep the loop counter in an SGPR and use an S_BUFFER_LOAD
;
-; GCN: v_readfirstlane_b32
-; GCN: s_buffer_load_dword
+; GCN: buffer_load_dword
define amdgpu_ps float @smrd_uniform_loop2(<4 x i32> inreg %desc, i32 %bound, i32 %bound.a) #0 {
main_body:
br label %loop
OpenPOWER on IntegriCloud