diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-23 21:49:25 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-23 21:49:25 +0000 |
commit | f43c2a0b4967626c7255940a7317ec4c8c9e9319 (patch) | |
tree | f27a5a2c8fa22e8ff0e614efbe33c98c748f3c26 /llvm/test/CodeGen/AMDGPU/captured-frame-index.ll | |
parent | 94710840fb2e1a16a75124593314b60bf13a0a3a (diff) | |
download | bcm5719-llvm-f43c2a0b4967626c7255940a7317ec4c8c9e9319.tar.gz bcm5719-llvm-f43c2a0b4967626c7255940a7317ec4c8c9e9319.zip |
AMDGPU: Insert moves of frame index to value operands
Strengthen tests of storing frame indices.
Right now this just creates irrelevant scheduling changes.
We don't want to have multiple frame index operands
on an instruction. There seem to be various assumptions
that at least the same frame index will not appear twice
in the LocalStackSlotAllocation pass.
There's no reason to have this happen, and it just
makes it easy to introduce bugs where the immediate
offset is appplied to the storing instruction when it should
really be applied to the value being stored as a separate
add.
This might not be sufficient. It might still be problematic
to have an add fi, fi situation, but that's even less unlikely
to happen in real code.
llvm-svn: 264200
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/captured-frame-index.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/captured-frame-index.ll | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll new file mode 100644 index 00000000000..6b70c931feb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll @@ -0,0 +1,119 @@ +; RUN: llc -march=amdgcn -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}stored_fi_to_lds: +; GCN: s_load_dword [[LDSPTR:s[0-9]+]] +; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]] + +; GCN: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] + +; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]] +define void @stored_fi_to_lds(float* addrspace(3)* %ptr) #0 { + %tmp = alloca float + store float 4.0, float *%tmp + store float* %tmp, float* addrspace(3)* %ptr + ret void +} + +; Offset is applied +; GCN-LABEL: {{^}}stored_fi_to_lds_2_small_objects: +; GCN: s_load_dword [[LDSPTR:s[0-9]+]] +; GCN: v_mov_b32_e32 [[ZERO0:v[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 [[ZERO1:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[ZERO1]] + +; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, [[FI1]] + + +; GCN-DAG: v_mov_b32_e32 [[VLDSPTR:v[0-9]+]], [[LDSPTR]] +; GCN: ds_write_b32 [[VLDSPTR]], [[ZERO0]] + +; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN: ds_write_b32 [[VLDSPTR]], [[FI1]] +define void @stored_fi_to_lds_2_small_objects(float* addrspace(3)* %ptr) #0 { + %tmp0 = alloca float + %tmp1 = alloca float + store float 4.0, float *%tmp0 + store float 4.0, float *%tmp1 + store volatile float* %tmp0, float* addrspace(3)* %ptr + store volatile float* %tmp1, float* addrspace(3)* %ptr + ret void +} + +; Same frame index is used multiple times in the store +; GCN-LABEL: {{^}}stored_fi_to_self: +define void @stored_fi_to_self() #0 { + %tmp = alloca i32* + + ; Avoid optimizing everything out + store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp + %bitcast = bitcast i32** %tmp to i32* + store volatile i32* %bitcast, i32** %tmp + ret void +} + +; GCN-LABEL: {{^}}stored_fi_to_fi: +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword + +; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} +; GCN: buffer_store_dword [[FI1]], [[FI2]] + +; GCN-DAG: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} +; GCN: buffer_store_dword [[FI2]], [[FI1]] +define void @stored_fi_to_fi() #0 { + %tmp0 = alloca i32* + %tmp1 = alloca i32* + %tmp2 = alloca i32* + store volatile i32* inttoptr (i32 1234 to i32*), i32** %tmp0 + store volatile i32* inttoptr (i32 5678 to i32*), i32** %tmp1 + store volatile i32* inttoptr (i32 9999 to i32*), i32** %tmp2 + + %bitcast1 = bitcast i32** %tmp1 to i32* + %bitcast2 = bitcast i32** %tmp2 to i32* ; at offset 8 + + store volatile i32* %bitcast1, i32** %tmp2 ; store offset 4 at offset 8 + store volatile i32* %bitcast2, i32** %tmp1 ; store offset 8 at offset 4 + ret void +} + +; GCN-LABEL: {{^}}stored_fi_to_global: +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}} +; GCN: buffer_store_dword [[FI]] +define void @stored_fi_to_global(float* addrspace(1)* %ptr) #0 { + %tmp = alloca float + store float 0.0, float *%tmp + store float* %tmp, float* addrspace(1)* %ptr + ret void +} + +; Offset is applied +; GCN-LABEL: {{^}}stored_fi_to_global_2_small_objects: +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen + +; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} +; GCN: buffer_store_dword [[FI1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} + +; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} +; GCN: buffer_store_dword [[FI2]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 { + %tmp0 = alloca float + %tmp1 = alloca float + %tmp2 = alloca float + store volatile float 0.0, float *%tmp0 + store volatile float 0.0, float *%tmp1 + store volatile float 0.0, float *%tmp2 + store volatile float* %tmp1, float* addrspace(1)* %ptr + store volatile float* %tmp2, float* addrspace(1)* %ptr + ret void +} + +attributes #0 = { nounwind } |