diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-14 15:19:03 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-14 15:19:03 +0000 |
commit | fa5f767a38be412f75e4b1426356263c65291ec6 (patch) | |
tree | 1b3e3f1f1898b68a7e466b9d503cc9cf61beff0c /llvm/test/CodeGen | |
parent | 7246dcc880cb36e47daa120f63585d46f92e2649 (diff) | |
download | bcm5719-llvm-fa5f767a38be412f75e4b1426356263c65291ec6.tar.gz bcm5719-llvm-fa5f767a38be412f75e4b1426356263c65291ec6.zip |
AMDGPU: Improve splitting 64-bit bit ops by constants
This addresses a TODO to handle operations besides and. This
also starts eliminating no-op operations with a constant that
can emerge later.
llvm-svn: 281488
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/and.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bitreverse.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bswap.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll | 144 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ctpop64.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/or.ll | 102 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/xor.ll | 78 |
9 files changed, 342 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll index eb0bf65d9b6..d3d3cec9bbb 100644 --- a/llvm/test/CodeGen/AMDGPU/and.ll +++ b/llvm/test/CodeGen/AMDGPU/and.ll @@ -324,6 +324,20 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt ret void } +; FIXME: Should be able to reduce load width +; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64: +; SI: buffer_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} +; SI-NOT: and +; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]] +; SI-NOT: and +; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} +define void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %and = and i64 %a, -8 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64 ; SI: s_load_dword ; SI-NOT: and diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll index 62e7904f438..0acaceaa6fe 100644 --- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll +++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i16 @llvm.bitreverse.i16(i16) #1 @@ -79,6 +79,7 @@ define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 { } ; FUNC-LABEL: {{^}}v_brev_i64: +; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0 define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 { %val = load i64, i64 addrspace(1)* %valptr %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1 diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll index 4cf8e4bfed5..23b93ce2f07 100644 --- a/llvm/test/CodeGen/AMDGPU/bswap.ll +++ b/llvm/test/CodeGen/AMDGPU/bswap.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare i32 @llvm.bswap.i32(i32) nounwind readnone @@ -93,6 +93,8 @@ define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace( ret void } +; FUNC-LABEL: {{^}}test_bswap_i64: +; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0 define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { %val = load i64, i64 addrspace(1)* %in, align 8 %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll new file mode 100644 index 00000000000..3e167846c22 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll @@ -0,0 +1,144 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}fold_mi_v_and_0: +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] +define void @fold_mi_v_and_0(i32 addrspace(1)* %out) { + %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %size = call i32 @llvm.amdgcn.groupstaticsize() + %and = and i32 %size, %x + store i32 %and, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_s_and_0: +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] +define void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 { + %size = call i32 @llvm.amdgcn.groupstaticsize() + %and = and i32 %size, %x + store i32 %and, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_v_or_0: +; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]] +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] +define void @fold_mi_v_or_0(i32 addrspace(1)* %out) { + %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %size = call i32 @llvm.amdgcn.groupstaticsize() + %or = or i32 %size, %x + store i32 %or, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_s_or_0: +; GCN: s_load_dword [[SVAL:s[0-9]+]] +; GCN-NOT: [[SVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]] +; GCN-NOT: [[VVAL]] +; GCN: buffer_store_dword [[VVAL]] +define void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 { + %size = call i32 @llvm.amdgcn.groupstaticsize() + %or = or i32 %size, %x + store i32 %or, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_v_xor_0: +; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]] +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] +define void @fold_mi_v_xor_0(i32 addrspace(1)* %out) { + %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %size = call i32 @llvm.amdgcn.groupstaticsize() + %xor = xor i32 %size, %x + store i32 %xor, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_s_xor_0: +; GCN: s_load_dword [[SVAL:s[0-9]+]] +; GCN-NOT: [[SVAL]] +; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]] +; GCN-NOT: [[VVAL]] +; GCN: buffer_store_dword [[VVAL]] +define void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 { + %size = call i32 @llvm.amdgcn.groupstaticsize() + %xor = xor i32 %size, %x + store i32 %xor, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_s_not_0: +; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}} +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] +define void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 { + %size = call i32 @llvm.amdgcn.groupstaticsize() + %xor = xor i32 %size, -1 + store i32 %xor, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_v_not_0: +; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} +; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} +; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]] +; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}} +; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +define void @fold_mi_v_not_0(i64 addrspace(1)* %out) { + %vreg = load volatile i64, i64 addrspace(1)* undef + %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg) + %xor = xor i64 %ctpop, -1 + store i64 %xor, i64 addrspace(1)* %out + ret void +} + +; The neg1 appears after folding the not 0 +; GCN-LABEL: {{^}}fold_mi_or_neg1: +; GCN: buffer_load_dwordx2 +; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}} + +; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} +; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} +; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]] +; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]] +; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]] +; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +define void @fold_mi_or_neg1(i64 addrspace(1)* %out) { + %vreg0 = load volatile i64, i64 addrspace(1)* undef + %vreg1 = load volatile i64, i64 addrspace(1)* undef + %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0) + %xor = xor i64 %ctpop, -1 + %or = or i64 %xor, %vreg1 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}fold_mi_and_neg1: +; GCN: v_bcnt_u32_b32 +; GCN: v_bcnt_u32_b32 +; GCN: v_not_b32 +; GCN: v_and_b32 +; GCN-NOT: v_and_b32 +define void @fold_mi_and_neg1(i64 addrspace(1)* %out) { + %vreg0 = load volatile i64, i64 addrspace(1)* undef + %vreg1 = load volatile i64, i64 addrspace(1)* undef + %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0) + %xor = xor i64 %ctpop, -1 + %and = and i64 %xor, %vreg1 + store i64 %and, i64 addrspace(1)* %out + ret void +} + +declare i64 @llvm.ctpop.i64(i64) #1 +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 +declare i32 @llvm.amdgcn.groupstaticsize() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/ctpop64.ll b/llvm/test/CodeGen/AMDGPU/ctpop64.ll index ee180323fa3..21f366687c7 100644 --- a/llvm/test/CodeGen/AMDGPU/ctpop64.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop64.ll @@ -39,14 +39,13 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali ret void } -; FIXME: or 0 should be replaxed with copy ; FUNC-LABEL: {{^}}v_ctpop_i64_user: ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] -; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}} +; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}} ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GCN: s_endpgm define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll index 56f54cf7c5e..3e254850a93 100644 --- a/llvm/test/CodeGen/AMDGPU/or.ll +++ b/llvm/test/CodeGen/AMDGPU/or.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -62,6 +62,75 @@ define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) { ret void } +; FUNC-LABEL: {{^}}scalar_or_literal_i64: +; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI-DAG: s_or_b32 s[[RES_HI:[0-9]+]], s[[HI]], 0xf237b +; SI-DAG: s_or_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039 +; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]] +; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]] +define void @scalar_or_literal_i64(i64 addrspace(1)* %out, i64 %a) { + %or = or i64 %a, 4261135838621753 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}scalar_or_literal_multi_use_i64: +; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xf237b +; SI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x3039 +; SI: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} + +; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]] +; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]] +define void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %or = or i64 %a, 4261135838621753 + store i64 %or, i64 addrspace(1)* %out + + %foo = add i64 %b, 4261135838621753 + store volatile i64 %foo, i64 addrspace(1)* undef + ret void +} + +; FUNC-LABEL: {{^}}scalar_or_inline_imm_i64: +; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI-NOT: or_b32 +; SI: s_or_b32 s[[VAL_LO]], s[[VAL_LO]], 63 +; SI-NOT: or_b32 +; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[VAL_LO]] +; SI-NOT: or_b32 +; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]] +; SI-NOT: or_b32 +; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} +define void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) { + %or = or i64 %a, 63 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}scalar_or_inline_imm_multi_use_i64: +; SI-NOT: or_b32 +; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 63 +; SI-NOT: or_b32 +define void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %or = or i64 %a, 63 + store i64 %or, i64 addrspace(1)* %out + %foo = add i64 %b, 63 + store volatile i64 %foo, i64 addrspace(1)* undef + ret void +} + +; FUNC-LABEL: {{^}}scalar_or_neg_inline_imm_i64: +; SI-DAG: s_load_dword [[VAL:s[0-9]+]] +; SI-DAG: s_or_b32 [[VAL]], [[VAL]], -8 +; SI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], -1{{$}} +; SI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[VAL]] +; SI: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}} +define void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) { + %or = or i64 %a, -8 + store i64 %or, i64 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}vector_or_literal_i32: ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { @@ -127,8 +196,9 @@ define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, ; FIXME: The or 0 should really be removed. ; FUNC-LABEL: {{^}}vector_or_i64_imm: ; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, -; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]] -; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}} +; SI: v_or_b32_e32 v[[LO_RESULT:[0-9]+]], 8, v[[LO_VREG]] +; SI-NOT: v_or_b32_e32 {{v[0-9]+}}, 0 +; SI: buffer_store_dwordx2 v{{\[}}[[LO_RESULT]]:[[HI_VREG]]{{\]}} ; SI: s_endpgm define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { %loada = load i64, i64 addrspace(1)* %a, align 8 @@ -137,6 +207,32 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ret void } +; FUNC-LABEL: {{^}}vector_or_i64_neg_inline_imm: +; SI-DAG: buffer_load_dword v[[LO_VREG:[0-9]+]] +; SI-DAG: v_or_b32_e32 v[[RES_LO:[0-9]+]], -8, v[[LO_VREG]] +; SI-DAG: v_mov_b32_e32 v[[RES_HI:[0-9]+]], -1{{$}} +; SI: buffer_store_dwordx2 v{{\[}}[[RES_LO]]:[[RES_HI]]{{\]}} +; SI: s_endpgm +define void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, -8 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}vector_or_i64_neg_literal: +; SI-DAG: buffer_load_dword v[[LO_VREG:[0-9]+]] +; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, -1{{$}} +; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffffff38, v[[LO_VREG]] +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, -200 + store i64 %or, i64 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}trunc_i64_or_to_i32: ; SI: s_load_dword s[[SREG0:[0-9]+]] ; SI: s_load_dword s[[SREG1:[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll index a6555a19738..48bbc32abcb 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -97,7 +97,6 @@ define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* ; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]] -; GCN-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], 0, v[[SHLHI]]{{$}} ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll index 16eae1899ec..6e6b1e7b6de 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -38,6 +38,7 @@ define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* } ; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64: +; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1, define void @s_sint_to_fp_v2i64(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{ %result = sitofp <2 x i64> %in to <2 x float> store <2 x float> %result, <2 x float> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/xor.ll b/llvm/test/CodeGen/AMDGPU/xor.ll index 202170d6e22..53f4c0a9174 100644 --- a/llvm/test/CodeGen/AMDGPU/xor.ll +++ b/llvm/test/CodeGen/AMDGPU/xor.ll @@ -171,3 +171,81 @@ endif: store i64 %3, i64 addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}scalar_xor_literal_i64: +; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI-DAG: s_xor_b32 s[[RES_HI:[0-9]+]], s[[HI]], 0xf237b +; SI-DAG: s_xor_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039 +; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]] +; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]] +define void @scalar_xor_literal_i64(i64 addrspace(1)* %out, i64 %a) { + %or = xor i64 %a, 4261135838621753 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}scalar_xor_literal_multi_use_i64: +; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xf237b +; SI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x3039 +; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} + +; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]] +; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]] +define void @scalar_xor_literal_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %or = xor i64 %a, 4261135838621753 + store i64 %or, i64 addrspace(1)* %out + + %foo = add i64 %b, 4261135838621753 + store volatile i64 %foo, i64 addrspace(1)* undef + ret void +} + +; FUNC-LABEL: {{^}}scalar_xor_inline_imm_i64: +; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI-NOT: xor_b32 +; SI: s_xor_b32 s[[VAL_LO]], s[[VAL_LO]], 63 +; SI-NOT: xor_b32 +; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[VAL_LO]] +; SI-NOT: xor_b32 +; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]] +; SI-NOT: xor_b32 +; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} +define void @scalar_xor_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) { + %or = xor i64 %a, 63 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}scalar_xor_neg_inline_imm_i64: +; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; SI: s_xor_b64 [[VAL]], [[VAL]], -8 +define void @scalar_xor_neg_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) { + %or = xor i64 %a, -8 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}vector_xor_i64_neg_inline_imm: +; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI: v_xor_b32_e32 {{v[0-9]+}}, -8, v[[LO_VREG]] +; SI: v_xor_b32_e32 {{v[0-9]+}}, -1, {{.*}} +; SI: s_endpgm +define void @vector_xor_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %or = xor i64 %loada, -8 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}vector_xor_literal_i64: +; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, +; SI-DAG: v_xor_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]] +; SI-DAG: v_xor_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]] +; SI: s_endpgm +define void @vector_xor_literal_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %or = xor i64 %loada, 22470723082367 + store i64 %or, i64 addrspace(1)* %out + ret void +} |