summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/sminmax.ll
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2016-04-12 21:18:10 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2016-04-12 21:18:10 +0000
commitdf77c9ada4c172def64f0a3514df6a34a223c409 (patch)
treee138e971963abf92c92ad8ea37836c317ed76e72 /llvm/test/CodeGen/AMDGPU/sminmax.ll
parentc86af3345c9e3d3814c73df644d58c11eba38922 (diff)
downloadbcm5719-llvm-df77c9ada4c172def64f0a3514df6a34a223c409.tar.gz
bcm5719-llvm-df77c9ada4c172def64f0a3514df6a34a223c409.zip
AMDGPU: add llvm.amdgcn.buffer.load/store intrinsics
Summary: They correspond to BUFFER_LOAD/STORE_DWORD[_X2,X3,X4] and mostly behave like llvm.amdgcn.buffer.load/store.format. They will be used by Mesa for SSBO and atomic counters at least when robust buffer access behavior is desired. (These instructions perform no format conversion and do buffer range checking per component.) As a side effect of sharing patterns with llvm.amdgcn.buffer.store.format, it has become trivial to add support for the f32 and v2f32 variants of that intrinsic, so the patch does so. Also DAG-ify (and fix) some tests that I noticed intermittent failures in while developing this patch. Some tests were (temporarily) adjusted for the required mayLoad/hasSideEffects changes to the BUFFER_STORE_DWORD* instructions. See also http://reviews.llvm.org/D18291. Reviewers: arsenm, tstellarAMD, mareko Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18292 llvm-svn: 266126
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/sminmax.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/sminmax.ll24
1 files changed, 12 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index e646605f7da..bea3ac15f55 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -46,11 +46,11 @@ define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind
}
; FUNC-LABEL: {{^}}v_abs_v2i32:
-; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
-; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
+; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
+; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
; GCN: v_add_i32
; GCN: v_add_i32
@@ -97,15 +97,15 @@ define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind
}
; FUNC-LABEL: {{^}}v_abs_v4i32:
-; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
-; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
-; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
-; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
+; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
+; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
+; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
+; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
-; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
; GCN: v_add_i32
; GCN: v_add_i32
OpenPOWER on IntegriCloud