diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/andorn2.ll | 103 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fceil64.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/nand.ll | 83 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/nor.ll | 83 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/xnor.ll | 33 |
8 files changed, 304 insertions, 12 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/andorn2.ll b/llvm/test/CodeGen/AMDGPU/andorn2.ll new file mode 100644 index 00000000000..390c103f367 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/andorn2.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s +; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s +; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX801 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s + +; GCN-LABEL: {{^}}scalar_andn2_i32_one_use +; GCN: s_andn2_b32 +define amdgpu_kernel void @scalar_andn2_i32_one_use( + i32 addrspace(1)* %r0, i32 %a, i32 %b) { +entry: + %nb = xor i32 %b, -1 + %r0.val = and i32 %a, %nb + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_andn2_i64_one_use +; GCN: s_andn2_b64 +define amdgpu_kernel void @scalar_andn2_i64_one_use( + i64 addrspace(1)* %r0, i64 %a, i64 %b) { +entry: + %nb = xor i64 %b, -1 + %r0.val = and i64 %a, %nb + store i64 %r0.val, i64 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_orn2_i32_one_use +; GCN: s_orn2_b32 +define amdgpu_kernel void @scalar_orn2_i32_one_use( + i32 addrspace(1)* %r0, i32 %a, i32 %b) { +entry: + %nb = xor i32 %b, -1 + %r0.val = or i32 %a, %nb + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_orn2_i64_one_use +; GCN: s_orn2_b64 +define amdgpu_kernel void @scalar_orn2_i64_one_use( + i64 addrspace(1)* %r0, i64 %a, i64 %b) { +entry: + %nb = xor i64 %b, -1 + %r0.val = or i64 %a, %nb + store i64 %r0.val, i64 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}vector_andn2_i32_s_v_one_use +; GCN: v_not_b32 +; GCN: v_and_b32 +define amdgpu_kernel void @vector_andn2_i32_s_v_one_use( + i32 addrspace(1)* %r0, i32 %s) { +entry: + %v = call i32 @llvm.amdgcn.workitem.id.x() #1 + %not = xor i32 %v, -1 + %r0.val = and i32 %s, %not + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}vector_andn2_i32_v_s_one_use +; GCN: s_not_b32 +; GCN: v_and_b32 +define amdgpu_kernel void @vector_andn2_i32_v_s_one_use( + i32 addrspace(1)* %r0, i32 %s) { +entry: + %v = call i32 @llvm.amdgcn.workitem.id.x() #1 + %not = xor i32 %s, -1 + %r0.val = and i32 %v, %not + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}vector_orn2_i32_s_v_one_use +; GCN: v_not_b32 +; GCN: v_or_b32 +define amdgpu_kernel void @vector_orn2_i32_s_v_one_use( + i32 addrspace(1)* %r0, i32 %s) { +entry: + %v = call i32 @llvm.amdgcn.workitem.id.x() #1 + %not = xor i32 %v, -1 + %r0.val = or i32 %s, %not + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}vector_orn2_i32_v_s_one_use +; GCN: s_not_b32 +; GCN: v_or_b32 +define amdgpu_kernel void @vector_orn2_i32_v_s_one_use( + i32 addrspace(1)* %r0, i32 %s) { +entry: + %v = call i32 @llvm.amdgcn.workitem.id.x() #1 + %not = xor i32 %s, -1 + %r0.val = or i32 %v, %not + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #0 diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll index 09d4b2c8bd7..8611cd080e1 100644 --- a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll @@ -107,7 +107,7 @@ define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) { ; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} ; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]] -; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]], v[[VREG1_LO]] +; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]] ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]] ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} define amdgpu_kernel void @fold_mi_or_neg1(i64 addrspace(1)* %out) { diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll index ba26ed23832..da852af3f23 100644 --- a/llvm/test/CodeGen/AMDGPU/fceil64.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll @@ -17,8 +17,7 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; are not always followed. ; SI-DAG: s_add_i32 [[SEXP0:s[0-9]+]], [[SEXP]], 0xfffffc01 ; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP0]] -; SI-DAG: s_not_b64 -; SI-DAG: s_and_b64 +; SI-DAG: s_andn2_b64 ; SI-DAG: cmp_gt_i32 ; SI-DAG: cndmask_b32 ; SI-DAG: cndmask_b32 diff --git a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll index 6fc4c8b7d24..226125335c3 100644 --- a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll @@ -27,8 +27,7 @@ define amdgpu_kernel void @v_ftrunc_f64(double addrspace(1)* %out, double addrsp ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 ; SI-DAG: s_add_i32 [[SEXP1:s[0-9]+]], [[SEXP]], 0xfffffc01 ; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP1]] -; SI-DAG: s_not_b64 -; SI-DAG: s_and_b64 +; SI-DAG: s_andn2_b64 ; SI-DAG: cmp_gt_i32 ; SI-DAG: cndmask_b32 ; SI-DAG: cndmask_b32 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index aca576cbde6..cf19486dfca 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -266,8 +266,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %ou ; VI: v_mov_b32_e32 [[V_LOAD:v[0-9]+]], [[LOAD]] ; VI: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 3 ; VI: s_lshl_b32 [[SHIFTED_MASK:s[0-9]+]], 0xffff, [[SCALED_IDX]] -; VI: s_not_b32 [[NOT_MASK:s[0-9]+]], [[SHIFTED_MASK]] -; VI: s_and_b32 [[AND_NOT_MASK:s[0-9]+]], [[NOT_MASK]], [[LOAD]] +; VI: s_andn2_b32 [[AND_NOT_MASK:s[0-9]+]], [[LOAD]], [[SHIFTED_MASK]] ; VI: v_bfi_b32 [[BFI:v[0-9]+]], [[SHIFTED_MASK]], 5, [[V_LOAD]] ; VI: s_lshr_b32 [[HI2:s[0-9]+]], [[AND_NOT_MASK]], 16 @@ -306,8 +305,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %ou ; VI-DAG: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 3 ; VI-DAG: s_mov_b32 s[[MASK_LO:[0-9]+]], 0xffff ; VI: s_lshl_b64 s{{\[}}[[MASK_SHIFT_LO:[0-9]+]]:[[MASK_SHIFT_HI:[0-9]+]]{{\]}}, s{{\[}}[[MASK_LO]]:[[MASK_HI]]{{\]}}, [[SCALED_IDX]] -; VI: s_not_b64 [[NOT_MASK:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[MASK_SHIFT_LO]]:[[MASK_SHIFT_HI]]{{\]}} -; VI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[NOT_MASK]], [[VEC]] +; VI: s_andn2_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[VEC]], s{{\[}}[[MASK_SHIFT_LO]]:[[MASK_SHIFT_HI]]{{\]}} ; VI: s_and_b32 s[[INS:[0-9]+]], s[[MASK_SHIFT_LO]], 5 ; VI: s_or_b64 s{{\[}}[[RESULT0:[0-9]+]]:[[RESULT1:[0-9]+]]{{\]}}, s{{\[}}[[INS]]:[[MASK_HI]]{{\]}}, [[AND]] ; VI: v_mov_b32_e32 v[[V_RESULT0:[0-9]+]], s[[RESULT0]] diff --git a/llvm/test/CodeGen/AMDGPU/nand.ll b/llvm/test/CodeGen/AMDGPU/nand.ll new file mode 100644 index 00000000000..be7d9f677ec --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/nand.ll @@ -0,0 +1,83 @@ +; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s +; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s +; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX801 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s + +; GCN-LABEL: {{^}}scalar_nand_i32_one_use +; GCN: s_nand_b32 +define amdgpu_kernel void @scalar_nand_i32_one_use( + i32 addrspace(1)* %r0, i32 %a, i32 %b) { +entry: + %and = and i32 %a, %b + %r0.val = xor i32 %and, -1 + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_nand_i32_mul_use +; GCN-NOT: s_nand_b32 +; GCN: s_and_b32 +; GCN: s_not_b32 +; GCN: s_add_i32 +define amdgpu_kernel void @scalar_nand_i32_mul_use( + i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) { +entry: + %and = and i32 %a, %b + %r0.val = xor i32 %and, -1 + %r1.val = add i32 %and, %a + store i32 %r0.val, i32 addrspace(1)* %r0 + store i32 %r1.val, i32 addrspace(1)* %r1 + ret void +} + +; GCN-LABEL: {{^}}scalar_nand_i64_one_use +; GCN: s_nand_b64 +define amdgpu_kernel void @scalar_nand_i64_one_use( + i64 addrspace(1)* %r0, i64 %a, i64 %b) { +entry: + %and = and i64 %a, %b + %r0.val = xor i64 %and, -1 + store i64 %r0.val, i64 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_nand_i64_mul_use +; GCN-NOT: s_nand_b64 +; GCN: s_and_b64 +; GCN: s_not_b64 +; GCN: s_add_u32 +; GCN: s_addc_u32 +define amdgpu_kernel void @scalar_nand_i64_mul_use( + i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) { +entry: + %and = and i64 %a, %b + %r0.val = xor i64 %and, -1 + %r1.val = add i64 %and, %a + store i64 %r0.val, i64 addrspace(1)* %r0 + store i64 %r1.val, i64 addrspace(1)* %r1 + ret void +} + +; GCN-LABEL: {{^}}vector_nand_i32_one_use +; GCN-NOT: s_nand_b32 +; GCN: v_and_b32 +; GCN: v_not_b32 +define i32 @vector_nand_i32_one_use(i32 %a, i32 %b) { +entry: + %and = and i32 %a, %b + %r = xor i32 %and, -1 + ret i32 %r +} + +; GCN-LABEL: {{^}}vector_nand_i64_one_use +; GCN-NOT: s_nand_b64 +; GCN: v_and_b32 +; GCN: v_and_b32 +; GCN: v_not_b32 +; GCN: v_not_b32 +define i64 @vector_nand_i64_one_use(i64 %a, i64 %b) { +entry: + %and = and i64 %a, %b + %r = xor i64 %and, -1 + ret i64 %r +} diff --git a/llvm/test/CodeGen/AMDGPU/nor.ll b/llvm/test/CodeGen/AMDGPU/nor.ll new file mode 100644 index 00000000000..8fddd39cad3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/nor.ll @@ -0,0 +1,83 @@ +; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s +; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s +; RUN: llc -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX801 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s + +; GCN-LABEL: {{^}}scalar_nor_i32_one_use +; GCN: s_nor_b32 +define amdgpu_kernel void @scalar_nor_i32_one_use( + i32 addrspace(1)* %r0, i32 %a, i32 %b) { +entry: + %or = or i32 %a, %b + %r0.val = xor i32 %or, -1 + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_nor_i32_mul_use +; GCN-NOT: s_nor_b32 +; GCN: s_or_b32 +; GCN: s_not_b32 +; GCN: s_add_i32 +define amdgpu_kernel void @scalar_nor_i32_mul_use( + i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) { +entry: + %or = or i32 %a, %b + %r0.val = xor i32 %or, -1 + %r1.val = add i32 %or, %a + store i32 %r0.val, i32 addrspace(1)* %r0 + store i32 %r1.val, i32 addrspace(1)* %r1 + ret void +} + +; GCN-LABEL: {{^}}scalar_nor_i64_one_use +; GCN: s_nor_b64 +define amdgpu_kernel void @scalar_nor_i64_one_use( + i64 addrspace(1)* %r0, i64 %a, i64 %b) { +entry: + %or = or i64 %a, %b + %r0.val = xor i64 %or, -1 + store i64 %r0.val, i64 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_nor_i64_mul_use +; GCN-NOT: s_nor_b64 +; GCN: s_or_b64 +; GCN: s_not_b64 +; GCN: s_add_u32 +; GCN: s_addc_u32 +define amdgpu_kernel void @scalar_nor_i64_mul_use( + i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) { +entry: + %or = or i64 %a, %b + %r0.val = xor i64 %or, -1 + %r1.val = add i64 %or, %a + store i64 %r0.val, i64 addrspace(1)* %r0 + store i64 %r1.val, i64 addrspace(1)* %r1 + ret void +} + +; GCN-LABEL: {{^}}vector_nor_i32_one_use +; GCN-NOT: s_nor_b32 +; GCN: v_or_b32 +; GCN: v_not_b32 +define i32 @vector_nor_i32_one_use(i32 %a, i32 %b) { +entry: + %or = or i32 %a, %b + %r = xor i32 %or, -1 + ret i32 %r +} + +; GCN-LABEL: {{^}}vector_nor_i64_one_use +; GCN-NOT: s_nor_b64 +; GCN: v_or_b32 +; GCN: v_or_b32 +; GCN: v_not_b32 +; GCN: v_not_b32 +define i64 @vector_nor_i64_one_use(i64 %a, i64 %b) { +entry: + %or = or i64 %a, %b + %r = xor i64 %or, -1 + ret i64 %r +} diff --git a/llvm/test/CodeGen/AMDGPU/xnor.ll b/llvm/test/CodeGen/AMDGPU/xnor.ll index 0371cc68f04..103cb3487ca 100644 --- a/llvm/test/CodeGen/AMDGPU/xnor.ll +++ b/llvm/test/CodeGen/AMDGPU/xnor.ll @@ -61,8 +61,8 @@ entry: ; GCN-LABEL: {{^}}vector_xnor_i32_one_use ; GCN-NOT: s_xnor_b32 -; GCN: v_xor_b32 ; GCN: v_not_b32 +; GCN: v_xor_b32 ; GCN-DL: v_xnor_b32 define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) { entry: @@ -73,10 +73,10 @@ entry: ; GCN-LABEL: {{^}}vector_xnor_i64_one_use ; GCN-NOT: s_xnor_b64 -; GCN: v_xor_b32 -; GCN: v_xor_b32 ; GCN: v_not_b32 +; GCN: v_xor_b32 ; GCN: v_not_b32 +; GCN: v_xor_b32 ; GCN-DL: v_xnor_b32 ; GCN-DL: v_xnor_b32 define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) { @@ -85,3 +85,30 @@ entry: %r = xor i64 %xor, -1 ret i64 %r } + +; GCN-LABEL: {{^}}xnor_s_v_i32_one_use +; GCN-NOT: s_xnor_b32 +; GCN: s_not_b32 +; GCN: v_xor_b32 +define amdgpu_kernel void @xnor_s_v_i32_one_use(i32 addrspace(1)* %out, i32 %s) { + %v = call i32 @llvm.amdgcn.workitem.id.x() #1 + %xor = xor i32 %s, %v + %d = xor i32 %xor, -1 + store i32 %d, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}xnor_v_s_i32_one_use +; GCN-NOT: s_xnor_b32 +; GCN: s_not_b32 +; GCN: v_xor_b32 +define amdgpu_kernel void @xnor_v_s_i32_one_use(i32 addrspace(1)* %out, i32 %s) { + %v = call i32 @llvm.amdgcn.workitem.id.x() #1 + %xor = xor i32 %v, %s + %d = xor i32 %xor, -1 + store i32 %d, i32 addrspace(1)* %out + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #0 |