summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/ctlz.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-11 17:02:00 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-11 17:02:00 +0000
commitde5fbe9c60d88308e5eabfe0360ba66863c09768 (patch)
treee4be0b70177eefabcf10f6b58e0d631f3ab7d410 /llvm/test/CodeGen/AMDGPU/ctlz.ll
parent94ad1b527f938452f496cf75289bd61cbdfa7875 (diff)
downloadbcm5719-llvm-de5fbe9c60d88308e5eabfe0360ba66863c09768.tar.gz
bcm5719-llvm-de5fbe9c60d88308e5eabfe0360ba66863c09768.zip
AMDGPU: Pattern match ffbh pattern to instruction.
The hardware instruction's output on 0 is -1 rather than 32. Eliminate a test and select to -1. This removes an extra instruction from the compatability function with HSAIL's firstbit instruction. llvm-svn: 257352
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/ctlz.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/ctlz.ll59
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index d0e0f621df1..fcff1b585e3 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -150,3 +150,62 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)*
store i32 %trunc, i32 addrspace(1)* %out.gep
ret void
}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+ define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i32 %val, 0
+ %sel = select i1 %cmp, i32 -1, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp ne i32 %val, 0
+ %sel = select i1 %cmp, i32 %ctlz, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; TODO: Should be able to eliminate select here as well.
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: s_endpgm
+define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i32 %ctlz, 32
+ %sel = select i1 %cmp, i32 -1, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: s_endpgm
+define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp ne i32 %ctlz, 32
+ %sel = select i1 %cmp, i32 %ctlz, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
OpenPOWER on IntegriCloud