summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-02-07 00:21:34 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-02-07 00:21:34 +0000
commita18b3bcf51870e177586d70b02565d4ca13ab83a (patch)
tree369b1f7504d7dc5bd7ee802fb8135e02841542ba /llvm/test/CodeGen/AMDGPU
parent258f059f88daf5ddb081c56c1daee71f2de81fad (diff)
downloadbcm5719-llvm-a18b3bcf51870e177586d70b02565d4ca13ab83a.tar.gz
bcm5719-llvm-a18b3bcf51870e177586d70b02565d4ca13ab83a.zip
AMDGPU: Select BFI patterns with 64-bit ints
llvm-svn: 324431
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/bfi_int.ll158
1 files changed, 146 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
index 7870e5f378d..1e74f06bbda 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 %s
-; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck --check-prefix=SI %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck -check-prefixes=R600,FUNC %s
; BFI_INT Definition pattern from ISA docs
; (y & x) | (z & ~x)
;
-; R600: {{^}}bfi_def:
+; FUNC-LABEL: {{^}}bfi_def:
; R600: BFI_INT
-; SI: @bfi_def
-; SI: v_bfi_b32
+
+; GCN: v_bfi_b32
define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %x, -1
@@ -21,10 +21,10 @@ entry:
; SHA-256 Ch function
; z ^ (x & (y ^ z))
-; R600: {{^}}bfi_sha256_ch:
+; FUNC-LABEL: {{^}}bfi_sha256_ch:
; R600: BFI_INT
-; SI: @bfi_sha256_ch
-; SI: v_bfi_b32
+
+; GCN: v_bfi_b32
define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = xor i32 %y, %z
@@ -36,12 +36,12 @@ entry:
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
-; R600: {{^}}bfi_sha256_ma:
+; FUNC-LABEL: {{^}}bfi_sha256_ma:
; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
-; SI: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
+; GCN: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; GCN: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
%0 = and i32 %x, %z
@@ -51,3 +51,137 @@ entry:
store i32 %3, i32 addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}v_bitselect_v2i32_pat1:
+; GCN: s_waitcnt
+; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
+; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
+; GCN-NEXT: s_setpc_b64
+define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
+ %xor.0 = xor <2 x i32> %a, %mask
+ %and = and <2 x i32> %xor.0, %b
+ %bitselect = xor <2 x i32> %and, %mask
+ ret <2 x i32> %bitselect
+}
+
+; FUNC-LABEL: {{^}}v_bitselect_i64_pat_0:
+; GCN: s_waitcnt
+; GCN-NEXT: v_bfi_b32 v1, v1, v3, v5
+; GCN-NEXT: v_bfi_b32 v0, v0, v2, v4
+; GCN-NEXT: s_setpc_b64
+define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
+ %and0 = and i64 %a, %b
+ %not.a = xor i64 %a, -1
+ %and1 = and i64 %not.a, %mask
+ %bitselect = or i64 %and0, %and1
+ ret i64 %bitselect
+}
+
+; FUNC-LABEL: {{^}}v_bitselect_i64_pat_1:
+; GCN: s_waitcnt
+; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
+; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
+; GCN-NEXT: s_setpc_b64
+define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
+ %xor.0 = xor i64 %a, %mask
+ %and = and i64 %xor.0, %b
+ %bitselect = xor i64 %and, %mask
+ ret i64 %bitselect
+}
+
+; FUNC-LABEL: {{^}}v_bitselect_i64_pat_2:
+; GCN: s_waitcnt
+; GCN-DAG: v_bfi_b32 v0, v2, v0, v4
+; GCN-DAG: v_bfi_b32 v1, v3, v1, v5
+; GCN-NEXT: s_setpc_b64
+define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
+ %xor.0 = xor i64 %a, %mask
+ %and = and i64 %xor.0, %b
+ %bitselect = xor i64 %and, %mask
+ ret i64 %bitselect
+}
+
+; FUNC-LABEL: {{^}}v_bfi_sha256_ma_i64:
+; GCN-DAG: v_xor_b32_e32 v1, v1, v3
+; GCN-DAG: v_xor_b32_e32 v0, v0, v2
+; GCN-DAG: v_bfi_b32 v1, v1, v5, v3
+; GCN-DAG: v_bfi_b32 v0, v0, v4, v2
+define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
+entry:
+ %and0 = and i64 %x, %z
+ %or0 = or i64 %x, %z
+ %and1 = and i64 %y, %or0
+ %or1 = or i64 %and0, %and1
+ ret i64 %or1
+}
+
+; FIXME: Should leave as 64-bit SALU ops
+; FUNC-LABEL: {{^}}s_bitselect_i64_pat_0:
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_bfi_b32
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_bfi_b32
+define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
+ %and0 = and i64 %a, %b
+ %not.a = xor i64 %a, -1
+ %and1 = and i64 %not.a, %mask
+ %bitselect = or i64 %and0, %and1
+ %scalar.use = add i64 %bitselect, 10
+ store i64 %scalar.use, i64 addrspace(1)* undef
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_bitselect_i64_pat_1:
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN-DAG: v_bfi_b32
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_bfi_b32
+define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
+ %xor.0 = xor i64 %a, %mask
+ %and = and i64 %xor.0, %b
+ %bitselect = xor i64 %and, %mask
+
+ %scalar.use = add i64 %bitselect, 10
+ store i64 %scalar.use, i64 addrspace(1)* undef
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_bitselect_i64_pat_2:
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN-DAG: v_bfi_b32
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN: v_bfi_b32
+define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
+ %xor.0 = xor i64 %a, %mask
+ %and = and i64 %xor.0, %b
+ %bitselect = xor i64 %and, %mask
+
+ %scalar.use = add i64 %bitselect, 10
+ store i64 %scalar.use, i64 addrspace(1)* undef
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_bfi_sha256_ma_i64:
+; GCN: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN-DAG: v_xor_b32
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s
+; GCN-DAG: v_xor_b32
+; GCN-DAG: v_bfi_b32
+; GCN: v_bfi_b32
+define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
+entry:
+ %and0 = and i64 %x, %z
+ %or0 = or i64 %x, %z
+ %and1 = and i64 %y, %or0
+ %or1 = or i64 %and0, %and1
+
+ %scalar.use = add i64 %or1, 10
+ store i64 %scalar.use, i64 addrspace(1)* undef
+ ret void
+}
OpenPOWER on IntegriCloud