diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/ds_write2st64.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds_write2st64.ll | 90 |
1 files changed, 59 insertions, 31 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll index a395af34b67..26e12d50172 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll @@ -1,12 +1,16 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s @lds = addrspace(3) global [512 x float] undef, align 4 -; SI-LABEL: @simple_write2st64_one_val_f32_0_1 -; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_one_val_f32_0_1 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0n + +; GCN-DAG: {{buffer|global}}_load_dword [[VAL:v[0-9]+]] +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -19,12 +23,20 @@ define amdgpu_kernel void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2st64_two_val_f32_2_5 -; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_two_val_f32_2_5 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 + +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 + + +; GCN-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -40,12 +52,20 @@ define amdgpu_kernel void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* ret void } -; SI-LABEL: @simple_write2st64_two_val_max_offset_f32 -; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}} -; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_two_val_max_offset_f32 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 + +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, off offset:4 + +; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v{{[0-9]+}} +; GCN: v_add{{(_co)?}}_{{i|u}}32_e32 [[VPTR:v[0-9]+]], vcc, s{{[0-9]+}}, [[SHL]] +; GCN: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i @@ -60,12 +80,20 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f32(float addrsp ret void } -; SI-LABEL: @simple_write2st64_two_val_max_offset_f64 -; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 -; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]], -; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 -; SI: s_endpgm +; GCN-LABEL: @simple_write2st64_two_val_max_offset_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; CI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; CI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8 + +; GFX9-DAG: global_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off{{$}} +; GFX9-DAG: global_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, off offset:8 + +; GCN-DAG: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 3, v{{[0-9]+}} +; GCN: v_add_{{(co_)?}}{{i|u}}32_e32 [[VPTR:v[0-9]+]], vcc, s{{[0-9]+}}, [[SHL]] +; GCN: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 +; GCN: s_endpgm define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -81,10 +109,13 @@ define amdgpu_kernel void @simple_write2st64_two_val_max_offset_f64(double addrs ret void } -; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64 -; SI-NOT: ds_write2st64_b64 -; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8 -; SI: s_endpgm +; GCN-LABEL: @byte_size_only_divisible_64_write2st64_f64 +; CI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-NOT: ds_write2st64_b64 +; GCN: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8 +; GCN: s_endpgm define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i @@ -97,10 +128,7 @@ define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(double add ret void } -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind } |

