diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/smrd.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/smrd.ll | 45 |
1 files changed, 27 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index a67d471ee19..d70801381ce 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -292,18 +292,19 @@ main_body: ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm: ; GCN-NEXT: %bb. -; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; +; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4092 ; define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: - %off = add i32 %offset, 4095 + %off = add i32 %offset, 4092 %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off) ret float %r } ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large: ; GCN-NEXT: %bb. -; GCN-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0 -; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; +; SICI-NEXT: v_add_{{i|u}}32_e32 v0, {{(vcc, )?}}0x1000, v0 +; SICI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; +; VIGFX9-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 4 offen offset:4092 ; define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: %off = add i32 %offset, 4096 @@ -512,12 +513,15 @@ main_body: } ; GCN-LABEL: {{^}}smrd_load_nonconst4: -; SICIVI: v_add_{{i32|u32}}_e32 v{{[0-9]+}}, vcc, 0xff8, v0 ; -; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0xff8, v0 ; -; GCN-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; -; GCN-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; -; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; -; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; +; SICI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xff8, v0 ; +; SICI-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; +; SICI-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; +; SICI-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; +; SICI-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 56 offen offset:4032 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 56 offen offset:4048 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 56 offen offset:4064 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 56 offen offset:4080 ; ; GCN: ; return to shader part epilog define amdgpu_ps <16 x float> @smrd_load_nonconst4(<4 x i32> inreg %rsrc, i32 %off) #0 { main_body: @@ -528,12 +532,16 @@ main_body: } ; GCN-LABEL: {{^}}smrd_load_nonconst5: -; SICIVI: v_add_{{i32|u32}}_e32 v{{[0-9]+}}, vcc, 0x1004, v0 -; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x1004, v0 -; GCN-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; -; GCN-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; -; GCN-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; -; GCN-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; +; SICI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x1004, v0 +; SICI-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], 0 offen ; +; SICI-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], 0 offen offset:16 ; +; SICI-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], 0 offen offset:32 ; +; SICI-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], 0 offen offset:48 ; +; VIGFX9: s_movk_i32 s4, 0xfc0 +; VIGFX9-DAG: buffer_load_dwordx4 v[0:3], v{{[0-9]+}}, s[0:3], s4 offen offset:68 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[4:7], v{{[0-9]+}}, s[0:3], s4 offen offset:84 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[8:11], v{{[0-9]+}}, s[0:3], s4 offen offset:100 ; +; VIGFX9-DAG: buffer_load_dwordx4 v[12:15], v{{[0-9]+}}, s[0:3], s4 offen offset:116 ; ; GCN: ; return to shader part epilog define amdgpu_ps <16 x float> @smrd_load_nonconst5(<4 x i32> inreg %rsrc, i32 %off) #0 { main_body: @@ -561,9 +569,10 @@ main_body: ; GCN-LABEL: {{^}}smrd_uniform_loop: ; -; TODO: this should use an s_buffer_load +; TODO: we should keep the loop counter in an SGPR ; -; GCN: buffer_load_dword +; GCN: v_readfirstlane_b32 +; GCN: s_buffer_load_dword define amdgpu_ps float @smrd_uniform_loop(<4 x i32> inreg %desc, i32 %bound) #0 { main_body: br label %loop |