summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-06-16 15:51:48 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-06-16 15:51:48 +0000
commited891b55616dd07f05c18db593e103f97faba520 (patch)
tree7c3dda4725f03f730cb5bed4b4a873a931a005e3 /llvm/test/CodeGen/AMDGPU
parentfab40d3911d3052085220c616820e9761ed9e32f (diff)
downloadbcm5719-llvm-ed891b55616dd07f05c18db593e103f97faba520.tar.gz
bcm5719-llvm-ed891b55616dd07f05c18db593e103f97faba520.zip
Revert "Revert "Fix merges of non-zero vector stores""
Reapply r239539. Don't assume the collected number of stores is the same vector size. Just take the first N stores to fill the vector. llvm-svn: 239825
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/merge-stores.ll101
1 files changed, 99 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
index dbf9d4481ff..34a2fc7ffa7 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
@@ -89,7 +89,11 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
-; GCN: buffer_store_dwordx2
+; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0
+; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}}
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
@@ -99,7 +103,11 @@ define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0
}
; GCN-LABEL: {{^}}merge_global_store_4_constants_i32:
-; GCN: buffer_store_dwordx4
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x14d{{$}}
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x1c8{{$}}
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}}
+; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
@@ -530,6 +538,95 @@ define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
ret void
}
+; GCN-LABEL: {{^}}merge_global_store_5_constants_i32:
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 9{{$}}
+; GCN-DAG: v_mov_b32_e32 v[[HI4:[0-9]+]], -12{{$}}
+; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}}
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}}
+; GCN: buffer_store_dword v[[HI]]
+define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
+ store i32 9, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 12, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 16, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 -12, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 11, i32 addrspace(1)* %idx4, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_6_constants_i32:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx2
+define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
+ store i32 13, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 15, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 62, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 63, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 11, i32 addrspace(1)* %idx4, align 4
+ %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
+ store i32 123, i32 addrspace(1)* %idx5, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_7_constants_i32:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dword v
+define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
+ store i32 34, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 999, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 65, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 33, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 98, i32 addrspace(1)* %idx4, align 4
+ %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
+ store i32 91, i32 addrspace(1)* %idx5, align 4
+ %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
+ store i32 212, i32 addrspace(1)* %idx6, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
+; XGCN: buffer_store_dwordx4
+; XGCN: buffer_store_dwordx4
+
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
+ store i32 34, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 999, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 65, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 33, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 98, i32 addrspace(1)* %idx4, align 4
+ %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
+ store i32 91, i32 addrspace(1)* %idx5, align 4
+ %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
+ store i32 212, i32 addrspace(1)* %idx6, align 4
+ %idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7
+ store i32 999, i32 addrspace(1)* %idx7, align 4
+ ret void
+}
+
declare void @llvm.AMDGPU.barrier.local() #1
attributes #0 = { nounwind }
OpenPOWER on IntegriCloud