[AMDGPU] Combine and x, (sext cc from i1) => select cc, x, 0

Also factored out function to check if a boolean is an already deserialized value which does not require v_cndmask_b32 to be loaded. Added binary logical operators to its check. Differential Revision: https://reviews.llvm.org/D34500 llvm-svn: 306439
author: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2017-06-27 18:25:26 +0000
committer: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> 2017-06-27 18:25:26 +0000
commit: 6851ddf9428d743a558d89e69cc6974f8d731d6a (patch)
tree: 61d1864a617e205139011ea9387eb1c6568979f5 /llvm/test
parent: 9a4ce0cc1c8bc6a69a9e275eb61cf08d7794cfc1 (diff)
download: bcm5719-llvm-6851ddf9428d743a558d89e69cc6974f8d731d6a.tar.gz
bcm5719-llvm-6851ddf9428d743a558d89e69cc6974f8d731d6a.zip
2 files changed, 47 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
new file mode 100644
index 00000000000..cd4ac4d58ad
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}and_i1_sext_bool:
+; GCN: v_cmp_{{gt|le}}_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e{{32|64}} [[VAL:v[0-9]+]], 0, v{{[0-9]+}}, [[CC]]
+; GCN: store_dword {{.*}}[[VAL]]
+; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}}
+; GCN-NOT: v_and_b32_e32
+
+define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+  %v = load i32, i32 addrspace(1)* %gep, align 4
+  %cmp = icmp ugt i32 %x, %y
+  %ext = sext i1 %cmp to i32
+  %and = and i32 %v, %ext
+  store i32 %and, i32 addrspace(1)* %gep, align 4
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+
+attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 187fb24dfb6..9e47c7d3449 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -150,6 +150,26 @@ bb:
   ret void
 }
 
+; GCN-LABEL: {{^}}add_and:
+; GCN: s_and_b64 [[CC:[^,]+]],
+; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+
+define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
+bb:
+  %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+  %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+  %v = load i32, i32 addrspace(1)* %gep, align 4
+  %cmp1 = icmp ugt i32 %x, %y
+  %cmp2 = icmp ugt i32 %x, 1
+  %cmp = and i1 %cmp1, %cmp2
+  %ext = zext i1 %cmp to i32
+  %add = add i32 %v, %ext
+  store i32 %add, i32 addrspace(1)* %gep, align 4
+  ret void
+}
+
 declare i1 @llvm.amdgcn.class.f32(float, i32) #0
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2017-06-27 18:25:26 +0000
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>	2017-06-27 18:25:26 +0000
commit	6851ddf9428d743a558d89e69cc6974f8d731d6a (patch)
tree	61d1864a617e205139011ea9387eb1c6568979f5 /llvm/test
parent	9a4ce0cc1c8bc6a69a9e275eb61cf08d7794cfc1 (diff)
download	bcm5719-llvm-6851ddf9428d743a558d89e69cc6974f8d731d6a.tar.gz bcm5719-llvm-6851ddf9428d743a558d89e69cc6974f8d731d6a.zip