summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll29
-rw-r--r--llvm/test/CodeGen/AMDGPU/wqm.ll47
2 files changed, 74 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
new file mode 100644
index 00000000000..648a90afbe5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+
+
+; GCN-LABEL: {{^}}set_inactive:
+; GCN: s_not_b64 exec, exec
+; GCN: v_mov_b32_e32 {{v[0-9]+}}, 42
+; GCN: s_not_b64 exec, exec
+define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
+ %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+ store i32 %tmp, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}set_inactive_64:
+; GCN: s_not_b64 exec, exec
+; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0
+; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0
+; GCN: s_not_b64 exec, exec
+define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
+ %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
+ store i64 %tmp, i64 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
+declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
+
+attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 44bbeeba9f0..12fb6cd4d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -256,6 +256,47 @@ endif:
ret float %out.1
}
+; Check that @llvm.amdgcn.set.inactive disables WWM.
+;
+;CHECK-LABEL: {{^}}test_set_inactive1:
+;CHECK: buffer_load_dword
+;CHECK: s_not_b64 exec, exec
+;CHECK: v_mov_b32_e32
+;CHECK: s_not_b64 exec, exec
+;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
+;CHECK: v_add_i32_e32
+define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) {
+main_body:
+ %src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
+ %src.0 = bitcast float %src to i32
+ %src.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src.0, i32 0)
+ %out = add i32 %src.1, %src.1
+ %out.0 = call i32 @llvm.amdgcn.wwm.i32(i32 %out)
+ %out.1 = bitcast i32 %out.0 to float
+ call void @llvm.amdgcn.buffer.store.f32(float %out.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
+ ret void
+}
+
+; Check that enabling WQM anywhere enables WQM for the set.inactive source.
+;
+;CHECK-LABEL: {{^}}test_set_inactive2:
+;CHECK: s_wqm_b64 exec, exec
+;CHECK: buffer_load_dword
+;CHECK: buffer_load_dword
+define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) {
+main_body:
+ %src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+ %src1.0 = bitcast float %src1 to i32
+ %src1.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src1.0, i32 undef)
+ %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
+ %src0.0 = bitcast float %src0 to i32
+ %src0.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %src0.0)
+ %out = add i32 %src0.1, %src1.1
+ %out.0 = bitcast i32 %out to float
+ call void @llvm.amdgcn.buffer.store.f32(float %out.0, <4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
+ ret void
+}
+
; Check a case of one branch of an if-else requiring WQM, the other requiring
; exact.
;
@@ -513,7 +554,7 @@ main_body:
; CHECK: s_wqm_b64 exec, exec
; CHECK: v_add_f32_e32 v0,
; CHECK: s_and_b64 exec, exec, [[ORIG]]
-define amdgpu_ps float @test_prolog_1(float %a, float %b) #4 {
+define amdgpu_ps float @test_prolog_1(float %a, float %b) #5 {
main_body:
%s = fadd float %a, %b
ret float %s
@@ -680,10 +721,12 @@ declare float @llvm.amdgcn.wqm.f32(float) #3
declare i32 @llvm.amdgcn.wqm.i32(i32) #3
declare float @llvm.amdgcn.wwm.f32(float) #3
declare i32 @llvm.amdgcn.wwm.i32(i32) #3
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #4
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
attributes #3 = { nounwind readnone }
-attributes #4 = { "amdgpu-ps-wqm-outputs" }
+attributes #4 = { nounwind readnone convergent }
+attributes #5 = { "amdgpu-ps-wqm-outputs" }
OpenPOWER on IntegriCloud