AMDGPU: Improve splitting 64-bit bit ops by constants

This addresses a TODO to handle operations besides and. This also starts eliminating no-op operations with a constant that can emerge later. llvm-svn: 281488
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-09-14 15:19:03 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-09-14 15:19:03 +0000
commit: fa5f767a38be412f75e4b1426356263c65291ec6 (patch)
tree: 1b3e3f1f1898b68a7e466b9d503cc9cf61beff0c /llvm/test/CodeGen
parent: 7246dcc880cb36e47daa120f63585d46f92e2649 (diff)
download: bcm5719-llvm-fa5f767a38be412f75e4b1426356263c65291ec6.tar.gz
bcm5719-llvm-fa5f767a38be412f75e4b1426356263c65291ec6.zip
9 files changed, 342 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll
index eb0bf65d9b6..d3d3cec9bbb 100644
--- a/llvm/test/CodeGen/AMDGPU/and.ll
+++ b/llvm/test/CodeGen/AMDGPU/and.ll
@@ -324,6 +324,20 @@ define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %apt
   ret void
 }
 
+; FIXME: Should be able to reduce load width
+; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64:
+; SI: buffer_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI-NOT: and
+; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
+; SI-NOT: and
+; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
+define void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %and = and i64 %a, -8
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64
 ; SI: s_load_dword
 ; SI-NOT: and
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index 62e7904f438..0acaceaa6fe 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i16 @llvm.bitreverse.i16(i16) #1
@@ -79,6 +79,7 @@ define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
 }
 
 ; FUNC-LABEL: {{^}}v_brev_i64:
+; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
 define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
   %val = load i64, i64 addrspace(1)* %valptr
   %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll
index 4cf8e4bfed5..23b93ce2f07 100644
--- a/llvm/test/CodeGen/AMDGPU/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/bswap.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
@@ -93,6 +93,8 @@ define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(
   ret void
 }
 
+; FUNC-LABEL: {{^}}test_bswap_i64:
+; SI-NOT: v_or_b32_e64 v{{[0-9]+}}, 0, 0
 define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
   %val = load i64, i64 addrspace(1)* %in, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
new file mode 100644
index 00000000000..3e167846c22
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
@@ -0,0 +1,144 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}fold_mi_v_and_0:
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; GCN-NOT: [[RESULT]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fold_mi_v_and_0(i32 addrspace(1)* %out) {
+  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %and = and i32 %size, %x
+  store i32 %and, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_s_and_0:
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; GCN-NOT: [[RESULT]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 {
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %and = and i32 %size, %x
+  store i32 %and, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_v_or_0:
+; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
+; GCN-NOT: [[RESULT]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fold_mi_v_or_0(i32 addrspace(1)* %out) {
+  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %or = or i32 %size, %x
+  store i32 %or, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_s_or_0:
+; GCN: s_load_dword [[SVAL:s[0-9]+]]
+; GCN-NOT: [[SVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
+; GCN-NOT: [[VVAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 {
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %or = or i32 %size, %x
+  store i32 %or, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_v_xor_0:
+; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]]
+; GCN-NOT: [[RESULT]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fold_mi_v_xor_0(i32 addrspace(1)* %out) {
+  %x = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %xor = xor i32 %size, %x
+  store i32 %xor, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_s_xor_0:
+; GCN: s_load_dword [[SVAL:s[0-9]+]]
+; GCN-NOT: [[SVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
+; GCN-NOT: [[VVAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @fold_mi_s_xor_0(i32 addrspace(1)* %out, i32 %x) #0 {
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %xor = xor i32 %size, %x
+  store i32 %xor, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_s_not_0:
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], -1{{$}}
+; GCN-NOT: [[RESULT]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 {
+  %size = call i32 @llvm.amdgcn.groupstaticsize()
+  %xor = xor i32 %size, -1
+  store i32 %xor, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_v_not_0:
+; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
+; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
+; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]]
+; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}}
+; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+define void @fold_mi_v_not_0(i64 addrspace(1)* %out) {
+  %vreg = load volatile i64, i64 addrspace(1)* undef
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg)
+  %xor = xor i64 %ctpop, -1
+  store i64 %xor, i64 addrspace(1)* %out
+  ret void
+}
+
+; The neg1 appears after folding the not 0
+; GCN-LABEL: {{^}}fold_mi_or_neg1:
+; GCN: buffer_load_dwordx2
+; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}}
+
+; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}}
+; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}}
+; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]]
+; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]]
+; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+define void @fold_mi_or_neg1(i64 addrspace(1)* %out) {
+  %vreg0 = load volatile i64, i64 addrspace(1)* undef
+  %vreg1 = load volatile i64, i64 addrspace(1)* undef
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
+  %xor = xor i64 %ctpop, -1
+  %or = or i64 %xor, %vreg1
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fold_mi_and_neg1:
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_not_b32
+; GCN: v_and_b32
+; GCN-NOT: v_and_b32
+define void @fold_mi_and_neg1(i64 addrspace(1)* %out) {
+  %vreg0 = load volatile i64, i64 addrspace(1)* undef
+  %vreg1 = load volatile i64, i64 addrspace(1)* undef
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %vreg0)
+  %xor = xor i64 %ctpop, -1
+  %and = and i64 %xor, %vreg1
+  store i64 %and, i64 addrspace(1)* %out
+  ret void
+}
+
+declare i64 @llvm.ctpop.i64(i64) #1
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+declare i32 @llvm.amdgcn.groupstaticsize() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop64.ll b/llvm/test/CodeGen/AMDGPU/ctpop64.ll
index ee180323fa3..21f366687c7 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop64.ll
@@ -39,14 +39,13 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
   ret void
 }
 
-; FIXME: or 0 should be replaxed with copy
 ; FUNC-LABEL: {{^}}v_ctpop_i64_user:
 ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
 ; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
 ; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
 ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
-; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}}
+; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}
 ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
 ; GCN: s_endpgm
 define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll
index 56f54cf7c5e..3e254850a93 100644
--- a/llvm/test/CodeGen/AMDGPU/or.ll
+++ b/llvm/test/CodeGen/AMDGPU/or.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 
@@ -62,6 +62,75 @@ define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
   ret void
 }
 
+; FUNC-LABEL: {{^}}scalar_or_literal_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: s_or_b32 s[[RES_HI:[0-9]+]], s[[HI]], 0xf237b
+; SI-DAG: s_or_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]]
+define void @scalar_or_literal_i64(i64 addrspace(1)* %out, i64 %a) {
+  %or = or i64 %a, 4261135838621753
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_or_literal_multi_use_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xf237b
+; SI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x3039
+; SI: s_or_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+
+; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]]
+; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]]
+define void @scalar_or_literal_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %or = or i64 %a, 4261135838621753
+  store i64 %or, i64 addrspace(1)* %out
+
+  %foo = add i64 %b, 4261135838621753
+  store volatile i64 %foo, i64 addrspace(1)* undef
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_or_inline_imm_i64:
+; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-NOT: or_b32
+; SI: s_or_b32 s[[VAL_LO]], s[[VAL_LO]], 63
+; SI-NOT: or_b32
+; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[VAL_LO]]
+; SI-NOT: or_b32
+; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]]
+; SI-NOT: or_b32
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+define void @scalar_or_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
+  %or = or i64 %a, 63
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_or_inline_imm_multi_use_i64:
+; SI-NOT: or_b32
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 63
+; SI-NOT: or_b32
+define void @scalar_or_inline_imm_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %or = or i64 %a, 63
+  store i64 %or, i64 addrspace(1)* %out
+  %foo = add i64 %b, 63
+  store volatile i64 %foo, i64 addrspace(1)* undef
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_or_neg_inline_imm_i64:
+; SI-DAG: s_load_dword [[VAL:s[0-9]+]]
+; SI-DAG: s_or_b32 [[VAL]], [[VAL]], -8
+; SI-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], -1{{$}}
+; SI-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], [[VAL]]
+; SI: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @scalar_or_neg_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
+  %or = or i64 %a, -8
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}vector_or_literal_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
 define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
@@ -127,8 +196,9 @@ define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
 ; FIXME: The or 0 should really be removed.
 ; FUNC-LABEL: {{^}}vector_or_i64_imm:
 ; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
-; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
+; SI: v_or_b32_e32 v[[LO_RESULT:[0-9]+]], 8, v[[LO_VREG]]
+; SI-NOT: v_or_b32_e32 {{v[0-9]+}}, 0
+; SI: buffer_store_dwordx2 v{{\[}}[[LO_RESULT]]:[[HI_VREG]]{{\]}}
 ; SI: s_endpgm
 define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 8
@@ -137,6 +207,32 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
   ret void
 }
 
+; FUNC-LABEL: {{^}}vector_or_i64_neg_inline_imm:
+; SI-DAG: buffer_load_dword v[[LO_VREG:[0-9]+]]
+; SI-DAG: v_or_b32_e32 v[[RES_LO:[0-9]+]], -8, v[[LO_VREG]]
+; SI-DAG: v_mov_b32_e32 v[[RES_HI:[0-9]+]], -1{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[RES_LO]]:[[RES_HI]]{{\]}}
+; SI: s_endpgm
+define void @vector_or_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64, i64 addrspace(1)* %a, align 8
+  %or = or i64 %loada, -8
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}vector_or_i64_neg_literal:
+; SI-DAG: buffer_load_dword v[[LO_VREG:[0-9]+]]
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, -1{{$}}
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 0xffffff38, v[[LO_VREG]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @vector_or_i64_neg_literal(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64, i64 addrspace(1)* %a, align 8
+  %or = or i64 %loada, -200
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}trunc_i64_or_to_i32:
 ; SI: s_load_dword s[[SREG0:[0-9]+]]
 ; SI: s_load_dword s[[SREG1:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
index a6555a19738..48bbc32abcb 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -97,7 +97,6 @@ define void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)*
 ; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]]
-; GCN-DAG: v_or_b32_e32 v[[OR1:[0-9]+]], 0, v[[SHLHI]]{{$}}
 
 ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GCN: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index 16eae1899ec..6e6b1e7b6de 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -38,6 +38,7 @@ define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)*
 }
 
 ; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64:
+; GCN-NOT: v_and_b32_e32 v{{[0-9]+}}, -1,
 define void @s_sint_to_fp_v2i64(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
   %result = sitofp <2 x i64> %in to <2 x float>
   store <2 x float> %result, <2 x float> addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/xor.ll b/llvm/test/CodeGen/AMDGPU/xor.ll
index 202170d6e22..53f4c0a9174 100644
--- a/llvm/test/CodeGen/AMDGPU/xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor.ll
@@ -171,3 +171,81 @@ endif:
   store i64 %3, i64 addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: {{^}}scalar_xor_literal_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: s_xor_b32 s[[RES_HI:[0-9]+]], s[[HI]], 0xf237b
+; SI-DAG: s_xor_b32 s[[RES_LO:[0-9]+]], s[[LO]], 0x3039
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_LO]]
+; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[RES_HI]]
+define void @scalar_xor_literal_i64(i64 addrspace(1)* %out, i64 %a) {
+  %or = xor i64 %a, 4261135838621753
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_xor_literal_multi_use_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0xf237b
+; SI-DAG: s_movk_i32 s[[K_LO:[0-9]+]], 0x3039
+; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+
+; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_LO]]
+; SI: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, s[[K_HI]]
+define void @scalar_xor_literal_multi_use_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %or = xor i64 %a, 4261135838621753
+  store i64 %or, i64 addrspace(1)* %out
+
+  %foo = add i64 %b, 4261135838621753
+  store volatile i64 %foo, i64 addrspace(1)* undef
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_xor_inline_imm_i64:
+; SI: s_load_dwordx2 s{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-NOT: xor_b32
+; SI: s_xor_b32 s[[VAL_LO]], s[[VAL_LO]], 63
+; SI-NOT: xor_b32
+; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[VAL_LO]]
+; SI-NOT: xor_b32
+; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[VAL_HI]]
+; SI-NOT: xor_b32
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
+define void @scalar_xor_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
+  %or = xor i64 %a, 63
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}scalar_xor_neg_inline_imm_i64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI: s_xor_b64 [[VAL]], [[VAL]], -8
+define void @scalar_xor_neg_inline_imm_i64(i64 addrspace(1)* %out, i64 %a) {
+  %or = xor i64 %a, -8
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}vector_xor_i64_neg_inline_imm:
+; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI: v_xor_b32_e32 {{v[0-9]+}}, -8, v[[LO_VREG]]
+; SI: v_xor_b32_e32 {{v[0-9]+}}, -1, {{.*}}
+; SI: s_endpgm
+define void @vector_xor_i64_neg_inline_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64, i64 addrspace(1)* %a, align 8
+  %or = xor i64 %loada, -8
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}vector_xor_literal_i64:
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_xor_b32_e32 {{v[0-9]+}}, 0xdf77987f, v[[LO_VREG]]
+; SI-DAG: v_xor_b32_e32 {{v[0-9]+}}, 0x146f, v[[HI_VREG]]
+; SI: s_endpgm
+define void @vector_xor_literal_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64, i64 addrspace(1)* %a, align 8
+  %or = xor i64 %loada, 22470723082367
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-09-14 15:19:03 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-09-14 15:19:03 +0000
commit	fa5f767a38be412f75e4b1426356263c65291ec6 (patch)
tree	1b3e3f1f1898b68a7e466b9d503cc9cf61beff0c /llvm/test/CodeGen
parent	7246dcc880cb36e47daa120f63585d46f92e2649 (diff)
download	bcm5719-llvm-fa5f767a38be412f75e4b1426356263c65291ec6.tar.gz bcm5719-llvm-fa5f767a38be412f75e4b1426356263c65291ec6.zip