diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 01:46:49 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-16 01:46:49 +0000 |
| commit | b6be2027794fe03afa665c7c9ae727d032fafa95 (patch) | |
| tree | b43a7e26199e324b1b53954a3eb0f157bf8ed0a6 /llvm/test | |
| parent | 190b232d73761ce785f7b3465fd5b3baf4095200 (diff) | |
| download | bcm5719-llvm-b6be2027794fe03afa665c7c9ae727d032fafa95.tar.gz bcm5719-llvm-b6be2027794fe03afa665c7c9ae727d032fafa95.zip | |
AMDGPU: Use s_addk_i32 / s_mulk_i32
llvm-svn: 266506
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fceil64.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/s_addk_i32.ll | 93 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/shl_add_constant.ll | 4 |
5 files changed, 140 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll index 579cbf435e7..6320f349e1a 100644 --- a/llvm/test/CodeGen/AMDGPU/fceil64.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll @@ -13,8 +13,8 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; CI: v_ceil_f64_e32 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: s_add_i32 [[A:s[0-9]+]], [[SEXP]], 0xfffffc01 -; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[A]] +; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01 +; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]] ; SI-DAG: s_not_b64 ; SI-DAG: s_and_b64 ; SI-DAG: cmp_gt_i32 diff --git a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll index 77852ee47e6..2fff10d2b4b 100644 --- a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll @@ -25,8 +25,8 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) { ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000 -; SI-DAG: s_add_i32 [[A:s[0-9]+]], [[SEXP]], 0xfffffc01 -; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[A]] +; SI-DAG: s_addk_i32 [[SEXP]], 0xfc01 +; SI-DAG: s_lshr_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], [[SEXP]] ; SI-DAG: s_not_b64 ; SI-DAG: s_and_b64 ; SI-DAG: cmp_gt_i32 diff --git a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll new file mode 100644 index 00000000000..987056010e6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll @@ -0,0 +1,93 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: {{^}}s_addk_i32_k0: +; SI: s_load_dword [[VAL:s[0-9]+]] +; SI: s_addk_i32 [[VAL]], 0x41 +; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]] +; SI: buffer_store_dword [[VRESULT]] +; SI: s_endpgm +define void @s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) { + %add = add i32 %b, 65 + store i32 %add, i32 addrspace(1)* %out + ret void +} + +; FIXME: This should be folded with any number of uses. +; SI-LABEL: {{^}}s_addk_i32_k0_x2: +; SI: s_movk_i32 [[K:s[0-9]+]], 0x41 +; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]] +; SI-DAG: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, [[K]] +; SI: s_endpgm +define void @s_addk_i32_k0_x2(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %a, i32 %b) { + %add0 = add i32 %a, 65 + %add1 = add i32 %b, 65 + store i32 %add0, i32 addrspace(1)* %out0 + store i32 %add1, i32 addrspace(1)* %out1 + ret void +} + +; SI-LABEL: {{^}}s_addk_i32_k1: +; SI: s_addk_i32 {{s[0-9]+}}, 0x7fff{{$}} +; SI: s_endpgm +define void @s_addk_i32_k1(i32 addrspace(1)* %out, i32 %b) { + %add = add i32 %b, 32767 ; (1 << 15) - 1 + store i32 %add, i32 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_addk_i32_k2: +; SI: s_addk_i32 {{s[0-9]+}}, 0xffef{{$}} +; SI: s_endpgm +define void @s_addk_i32_k2(i32 addrspace(1)* %out, i32 %b) { + %add = add i32 %b, -17 + store i32 %add, i32 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_addk_v2i32_k0: +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 +; SI: s_endpgm +define void @s_addk_v2i32_k0(<2 x i32> addrspace(1)* %out, <2 x i32> %b) { + %add = add <2 x i32> %b, <i32 65, i32 66> + store <2 x i32> %add, <2 x i32> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_addk_v4i32_k0: +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44 +; SI: s_endpgm +define void @s_addk_v4i32_k0(<4 x i32> addrspace(1)* %out, <4 x i32> %b) { + %add = add <4 x i32> %b, <i32 65, i32 66, i32 67, i32 68> + store <4 x i32> %add, <4 x i32> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_addk_v8i32_k0: +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x41 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x42 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x43 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x44 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x45 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x46 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x47 +; SI-DAG: s_addk_i32 {{s[0-9]+}}, 0x48 +; SI: s_endpgm +define void @s_addk_v8i32_k0(<8 x i32> addrspace(1)* %out, <8 x i32> %b) { + %add = add <8 x i32> %b, <i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72> + store <8 x i32> %add, <8 x i32> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}no_s_addk_i32_k0: +; SI: s_add_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8000{{$}} +; SI: s_endpgm +define void @no_s_addk_i32_k0(i32 addrspace(1)* %out, i32 %b) { + %add = add i32 %b, 32768 ; 1 << 15 + store i32 %add, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll new file mode 100644 index 00000000000..33d7eeacdb8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +; SI-LABEL: {{^}}s_mulk_i32_k0: +; SI: s_load_dword [[VAL:s[0-9]+]] +; SI: s_mulk_i32 [[VAL]], 0x41 +; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[VAL]] +; SI: buffer_store_dword [[VRESULT]] +; SI: s_endpgm +define void @s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) { + %mul = mul i32 %b, 65 + store i32 %mul, i32 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_mulk_i32_k1: +; SI: s_mulk_i32 {{s[0-9]+}}, 0x7fff{{$}} +; SI: s_endpgm +define void @s_mulk_i32_k1(i32 addrspace(1)* %out, i32 %b) { + %mul = mul i32 %b, 32767 ; (1 << 15) - 1 + store i32 %mul, i32 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_mulk_i32_k2: +; SI: s_mulk_i32 {{s[0-9]+}}, 0xffef{{$}} +; SI: s_endpgm +define void @s_mulk_i32_k2(i32 addrspace(1)* %out, i32 %b) { + %mul = mul i32 %b, -17 + store i32 %mul, i32 addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}no_s_mulk_i32_k0: +; SI: s_mul_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x8001{{$}} +; SI: s_endpgm +define void @no_s_mulk_i32_k0(i32 addrspace(1)* %out, i32 %b) { + %mul = mul i32 %b, 32769 ; 1 << 15 + 1 + store i32 %mul, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll index b1a3f8fbdc6..dfeaafa7095 100644 --- a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll @@ -74,8 +74,8 @@ define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) # ; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc ; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3 ; SI: s_add_i32 [[TMP:s[0-9]+]], [[Y]], [[SHL3]] -; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8 -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] +; SI: s_addk_i32 [[TMP]], 0x3d8 +; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]] ; SI: buffer_store_dword [[VRESULT]] define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { |

