diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-01-21 18:18:25 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-01-21 18:18:25 +0000 |
commit | b00554886f3ad7cd5a65a8955230bae1ed8c48e4 (patch) | |
tree | 963e88f49e56d521fd05960d11ab610cedea1968 /llvm/test | |
parent | 94269db8bacad503d6cf6472c610990bee665a14 (diff) | |
download | bcm5719-llvm-b00554886f3ad7cd5a65a8955230bae1ed8c48e4.tar.gz bcm5719-llvm-b00554886f3ad7cd5a65a8955230bae1ed8c48e4.zip |
R600/SI: Custom lower fround
This fixes it for SI. It also removes the pattern
used previously for Evergreen for f32. I'm not sure
if the the new R600 output is better or not, but it uses
1 fewer instructions if BFI is available.
llvm-svn: 226682
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/R600/llvm.round.f64.ll | 74 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/llvm.round.ll | 77 |
2 files changed, 124 insertions, 27 deletions
diff --git a/llvm/test/CodeGen/R600/llvm.round.f64.ll b/llvm/test/CodeGen/R600/llvm.round.f64.ll new file mode 100644 index 00000000000..404cb0f7ec0 --- /dev/null +++ b/llvm/test/CodeGen/R600/llvm.round.f64.ll @@ -0,0 +1,74 @@ +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}round_f64: +; SI: s_endpgm +define void @round_f64(double addrspace(1)* %out, double %x) #0 { + %result = call double @llvm.round.f64(double %x) #1 + store double %result, double addrspace(1)* %out + ret void +} + +; This is a pretty large function, so just test a few of the +; instructions that are necessary. + +; FUNC-LABEL: {{^}}v_round_f64: +; SI: buffer_load_dwordx2 +; SI: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11 + +; SI: v_not_b32_e32 +; SI: v_not_b32_e32 + +; SI: v_cmp_eq_i32 + +; SI: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff +; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]] + +; SI: v_cmp_lt_i32_e64 +; SI: v_cmp_gt_i32_e64 + + +; SI: buffer_store_dwordx2 +; SI: s_endpgm +define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { + %tid = call i32 @llvm.r600.read.tidig.x() #1 + %gep = getelementptr double addrspace(1)* %in, i32 %tid + %out.gep = getelementptr double addrspace(1)* %out, i32 %tid + %x = load double addrspace(1)* %gep + %result = call double @llvm.round.f64(double %x) #1 + store double %result, double addrspace(1)* %out.gep + ret void +} + +; FUNC-LABEL: {{^}}round_v2f64: +; SI: s_endpgm +define void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 { + %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1 + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}round_v4f64: +; SI: s_endpgm +define void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 { + %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1 + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}round_v8f64: +; SI: s_endpgm +define void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 { + %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1 + store <8 x double> %result, <8 x double> addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.tidig.x() #1 + +declare double @llvm.round.f64(double) #1 +declare <2 x double> @llvm.round.v2f64(<2 x double>) #1 +declare <4 x double> @llvm.round.v4f64(<4 x double>) #1 +declare <8 x double> @llvm.round.v8f64(<8 x double>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/llvm/test/CodeGen/R600/llvm.round.ll b/llvm/test/CodeGen/R600/llvm.round.ll index bedf4ba72ae..109f4c76651 100644 --- a/llvm/test/CodeGen/R600/llvm.round.ll +++ b/llvm/test/CodeGen/R600/llvm.round.ll @@ -1,17 +1,27 @@ -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600 --check-prefix=FUNC - -; FUNC-LABEL: {{^}}f32: -; R600: FRACT {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]] -; R600-DAG: ADD {{.*}}, -0.5 -; R600-DAG: CEIL {{.*}} [[ARG]] -; R600-DAG: FLOOR {{.*}} [[ARG]] -; R600-DAG: CNDGE -; R600-DAG: CNDGT -; R600: CNDGE {{[^,]+}}, [[ARG]] -define void @f32(float addrspace(1)* %out, float %in) { -entry: - %0 = call float @llvm.round.f32(float %in) - store float %0, float addrspace(1)* %out +; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}round_f32: +; SI-DAG: s_load_dword [[SX:s[0-9]+]] +; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] +; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff +; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]] +; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]] +; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] +; SI: v_cmp_ge_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SUB]]|, 0.5 +; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]] +; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]] +; SI: buffer_store_dword [[RESULT]] + +; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]] +; R600-DAG: ADD {{.*}}, +; R600-DAG: BFI_INT +; R600-DAG: SETGE +; R600-DAG: CNDE +; R600-DAG: ADD +define void @round_f32(float addrspace(1)* %out, float %x) #0 { + %result = call float @llvm.round.f32(float %x) #1 + store float %result, float addrspace(1)* %out ret void } @@ -20,24 +30,37 @@ entry: ; a test for the scalar case, so the vector tests just check that the ; compiler doesn't crash. -; FUNC-LABEL: v2f32 +; FUNC-LABEL: {{^}}round_v2f32: +; SI: s_endpgm ; R600: CF_END -define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { -entry: - %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in) - store <2 x float> %0, <2 x float> addrspace(1)* %out +define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 { + %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1 + store <2 x float> %result, <2 x float> addrspace(1)* %out ret void } -; FUNC-LABEL: v4f32 +; FUNC-LABEL: {{^}}round_v4f32: +; SI: s_endpgm ; R600: CF_END -define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { -entry: - %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in) - store <4 x float> %0, <4 x float> addrspace(1)* %out +define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 { + %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1 + store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } -declare float @llvm.round.f32(float) -declare <2 x float> @llvm.round.v2f32(<2 x float>) -declare <4 x float> @llvm.round.v4f32(<4 x float>) +; FUNC-LABEL: {{^}}round_v8f32: +; SI: s_endpgm +; R600: CF_END +define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 { + %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1 + store <8 x float> %result, <8 x float> addrspace(1)* %out + ret void +} + +declare float @llvm.round.f32(float) #1 +declare <2 x float> @llvm.round.v2f32(<2 x float>) #1 +declare <4 x float> @llvm.round.v4f32(<4 x float>) #1 +declare <8 x float> @llvm.round.v8f32(<8 x float>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } |