diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 22:09:04 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-01-18 22:09:04 +0000 |
commit | 33e3ecee0c44169f11d1fc4472250c1346e68a4e (patch) | |
tree | aefe8d1832cb203a53914cc47e3ecd8bb4bf5432 /llvm/test | |
parent | 6e3a45193aa927de0d4a677594f74c31cfc26727 (diff) | |
download | bcm5719-llvm-33e3ecee0c44169f11d1fc4472250c1346e68a4e.tar.gz bcm5719-llvm-33e3ecee0c44169f11d1fc4472250c1346e68a4e.zip |
AMDGPU: Reduce 64-bit SRAs
llvm-svn: 258096
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sra.ll | 33 |
3 files changed, 30 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll index 2fab00377d9..b47e68a01fa 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll @@ -105,10 +105,18 @@ define void @shl_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ; ashr (i64 x), 63 => (ashr lo(x), 31), lo(x) -; GCN-LABEL: {{^}}ashr_i64_const_gt_32: -define void @ashr_i64_const_gt_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { +; GCN-LABEL: {{^}}ashr_i64_const_32: +define void @ashr_i64_const_32(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %val = load i64, i64 addrspace(1)* %in - %shl = ashr i64 %val, 35 + %shl = ashr i64 %val, 32 + store i64 %shl, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}ashr_i64_const_63: +define void @ashr_i64_const_63(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %val = load i64, i64 addrspace(1)* %in + %shl = ashr i64 %val, 63 store i64 %shl, i64 addrspace(1)* %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll index 138b93b16d8..1d75c49a402 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -13,8 +13,7 @@ define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 { ; FUNC-LABEL: {{^}}v_sint_to_fp_i64_to_f32: ; GCN: {{buffer|flat}}_load_dwordx2 -; SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 63 -; VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\]}}, 63, {{v\[[0-9]+:[0-9]+\]}} +; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} ; GCN: v_xor_b32 ; GCN: v_ffbh_u32 diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll index b3ec6bdcd1b..bf1de020219 100644 --- a/llvm/test/CodeGen/AMDGPU/sra.ll +++ b/llvm/test/CodeGen/AMDGPU/sra.ll @@ -201,10 +201,10 @@ define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %i } ; GCN-LABEL: {{^}}s_ashr_32_i64: -; GCN: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; GCN: s_ashr_i64 [[SHIFT:s\[[0-9]+:[0-9]+\]]], [[VAL]], 32 -; GCN: s_add_u32 -; GCN: s_addc_u32 +; GCN: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31 +; GCN: s_add_u32 s{{[0-9]+}}, s[[HI]], s{{[0-9]+}} +; GCN: s_addc_u32 s{{[0-9]+}}, s[[SHIFT]], s{{[0-9]+}} define void @s_ashr_32_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %result = ashr i64 %a, 32 %add = add i64 %result, %b @@ -213,10 +213,10 @@ define void @s_ashr_32_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { } ; GCN-LABEL: {{^}}v_ashr_32_i64: -; GCN: {{buffer|flat}}_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] -; SI: v_ashr_i64 [[SHIFT:v\[[0-9]+:[0-9]+\]]], [[VAL]], 32 -; VI: v_ashrrev_i64 [[SHIFT:v\[[0-9]+:[0-9]+\]]], 32, [[VAL]] -; GCN: {{buffer|flat}}_store_dwordx2 [[SHIFT]] +; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; VI: flat_load_dword v[[HI:[0-9]+]] +; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] +; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[HI]]:[[SHIFT]]{{\]}} define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %tid = call i32 @llvm.r600.read.tidig.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid @@ -228,9 +228,11 @@ define void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { } ; GCN-LABEL: {{^}}s_ashr_63_i64: -; GCN-DAG: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; GCN: s_ashr_i64 [[SHIFT:s\[[0-9]+:[0-9]+\]]], [[VAL]], 63 -; GCN: s_add_u32 +; GCN-DAG: s_load_dword s[[HI:[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN: s_ashr_i32 s[[SHIFT:[0-9]+]], s[[HI]], 31 +; GCN: s_mov_b32 s[[COPYSHIFT:[0-9]+]], s[[SHIFT]] +; GCN: s_add_u32 {{s[0-9]+}}, s[[HI]], {{s[0-9]+}} +; GCN: s_addc_u32 {{s[0-9]+}}, s[[COPYSHIFT]], {{s[0-9]+}} define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { %result = ashr i64 %a, 63 %add = add i64 %result, %b @@ -239,10 +241,11 @@ define void @s_ashr_63_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { } ; GCN-LABEL: {{^}}v_ashr_63_i64: -; GCN-DAG: {{buffer|flat}}_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] -; SI: v_ashr_i64 [[SHIFT:v\[[0-9]+:[0-9]+\]]], [[VAL]], 63 -; VI: v_ashrrev_i64 [[SHIFT:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]] -; GCN: {{buffer|flat}}_store_dwordx2 [[SHIFT]] +; SI: buffer_load_dword v[[HI:[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; VI: flat_load_dword v[[HI:[0-9]+]] +; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] +; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]] +; GCN: {{buffer|flat}}_store_dwordx2 v{{\[}}[[SHIFT]]:[[COPY]]{{\]}} define void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %tid = call i32 @llvm.r600.read.tidig.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid |