summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll41
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll18
3 files changed, 60 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index d8cf67af7b0..e967723384b 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -1,11 +1,11 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; GCN-LABEL: {{^}}adjust_writemask_crash_0:
+; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain:
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
-define amdgpu_ps void @adjust_writemask_crash_0() #0 {
+define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
@@ -16,12 +16,12 @@ main_body:
ret void
}
-; GCN-LABEL: {{^}}adjust_writemask_crash_1:
+; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain:
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
; GCN-NOT: v1
; GCN-NOT: v0
; GCN: buffer_store_dword v0
-define amdgpu_ps void @adjust_writemask_crash_1() #0 {
+define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
@@ -32,6 +32,38 @@ main_body:
ret void
}
+; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain:
+; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
+main_body:
+ %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 0
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain:
+; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
+main_body:
+ %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 1
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
main_body:
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
@@ -44,6 +76,7 @@ main_body:
}
+declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll
index 2e78e2a4c6f..dfe4aff7bc1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.ll
@@ -3,6 +3,8 @@
; GCN-LABEL: {{^}}getlod:
; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da
+; GCN: s_waitcnt vmcnt(0)
+; GCN: store_dwordx4
define amdgpu_kernel void @getlod(<4 x float> addrspace(1)* %out) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.f32.v8i32(float undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1)
@@ -12,6 +14,8 @@ main_body:
; GCN-LABEL: {{^}}getlod_v2:
; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da
+; GCN: s_waitcnt vmcnt(0)
+; GCN: store_dwordx4
define amdgpu_kernel void @getlod_v2(<4 x float> addrspace(1)* %out) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1)
@@ -21,6 +25,8 @@ main_body:
; GCN-LABEL: {{^}}getlod_v4:
; GCN: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf da
+; GCN: s_waitcnt vmcnt(0)
+; GCN: store_dwordx4
define amdgpu_kernel void @getlod_v4(<4 x float> addrspace(1)* %out) {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 0, i1 0, i1 0, i1 0, i1 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
index 42c87056746..d9be4a4d019 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
@@ -129,6 +129,8 @@ main_body:
; GCN-LABEL: {{^}}getresinfo:
; GCN-NOT: s_waitcnt
; GCN: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} dmask:0xf
+; GCN: s_waitcnt vmcnt(0)
+; GCN: exp
define amdgpu_ps void @getresinfo() #0 {
main_body:
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
@@ -140,6 +142,19 @@ main_body:
ret void
}
+; GCN-LABEL: {{^}}getresinfo_dmask0:
+; GCN-NOT: image_get_resinfo
+define amdgpu_ps void @getresinfo_dmask0() #0 {
+main_body:
+ %r = call <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32 undef, <8 x i32> undef, i32 0, i1 false, i1 false, i1 false, i1 false)
+ %r0 = extractelement <4 x float> %r, i32 0
+ %r1 = extractelement <4 x float> %r, i32 1
+ %r2 = extractelement <4 x float> %r, i32 2
+ %r3 = extractelement <4 x float> %r, i32 3
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r0, float %r1, float %r2, float %r3, i1 true, i1 true) #0
+ ret void
+}
+
; Ideally, the register allocator would avoid the wait here
;
; GCN-LABEL: {{^}}image_store_wait:
@@ -186,9 +201,10 @@ declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32,
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare <4 x float> @llvm.amdgcn.image.load.mip.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.getresinfo.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #2
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
+attributes #2 = { nounwind readnone }
OpenPOWER on IntegriCloud