summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-01-21 18:18:25 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-01-21 18:18:25 +0000
commitb00554886f3ad7cd5a65a8955230bae1ed8c48e4 (patch)
tree963e88f49e56d521fd05960d11ab610cedea1968 /llvm/test
parent94269db8bacad503d6cf6472c610990bee665a14 (diff)
downloadbcm5719-llvm-b00554886f3ad7cd5a65a8955230bae1ed8c48e4.tar.gz
bcm5719-llvm-b00554886f3ad7cd5a65a8955230bae1ed8c48e4.zip
R600/SI: Custom lower fround
This fixes it for SI. It also removes the pattern used previously for Evergreen for f32. I'm not sure if the the new R600 output is better or not, but it uses 1 fewer instructions if BFI is available. llvm-svn: 226682
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/R600/llvm.round.f64.ll74
-rw-r--r--llvm/test/CodeGen/R600/llvm.round.ll77
2 files changed, 124 insertions, 27 deletions
diff --git a/llvm/test/CodeGen/R600/llvm.round.f64.ll b/llvm/test/CodeGen/R600/llvm.round.f64.ll
new file mode 100644
index 00000000000..404cb0f7ec0
--- /dev/null
+++ b/llvm/test/CodeGen/R600/llvm.round.f64.ll
@@ -0,0 +1,74 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}round_f64:
+; SI: s_endpgm
+define void @round_f64(double addrspace(1)* %out, double %x) #0 {
+ %result = call double @llvm.round.f64(double %x) #1
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; This is a pretty large function, so just test a few of the
+; instructions that are necessary.
+
+; FUNC-LABEL: {{^}}v_round_f64:
+; SI: buffer_load_dwordx2
+; SI: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11
+
+; SI: v_not_b32_e32
+; SI: v_not_b32_e32
+
+; SI: v_cmp_eq_i32
+
+; SI: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
+
+; SI: v_cmp_lt_i32_e64
+; SI: v_cmp_gt_i32_e64
+
+
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr double addrspace(1)* %out, i32 %tid
+ %x = load double addrspace(1)* %gep
+ %result = call double @llvm.round.f64(double %x) #1
+ store double %result, double addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}round_v2f64:
+; SI: s_endpgm
+define void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 {
+ %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}round_v4f64:
+; SI: s_endpgm
+define void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 {
+ %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}round_v8f64:
+; SI: s_endpgm
+define void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 {
+ %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1
+ store <8 x double> %result, <8 x double> addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+declare double @llvm.round.f64(double) #1
+declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
+declare <4 x double> @llvm.round.v4f64(<4 x double>) #1
+declare <8 x double> @llvm.round.v8f64(<8 x double>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/R600/llvm.round.ll b/llvm/test/CodeGen/R600/llvm.round.ll
index bedf4ba72ae..109f4c76651 100644
--- a/llvm/test/CodeGen/R600/llvm.round.ll
+++ b/llvm/test/CodeGen/R600/llvm.round.ll
@@ -1,17 +1,27 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
-
-; FUNC-LABEL: {{^}}f32:
-; R600: FRACT {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
-; R600-DAG: ADD {{.*}}, -0.5
-; R600-DAG: CEIL {{.*}} [[ARG]]
-; R600-DAG: FLOOR {{.*}} [[ARG]]
-; R600-DAG: CNDGE
-; R600-DAG: CNDGT
-; R600: CNDGE {{[^,]+}}, [[ARG]]
-define void @f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.round.f32(float %in)
- store float %0, float addrspace(1)* %out
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}round_f32:
+; SI-DAG: s_load_dword [[SX:s[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
+; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
+; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
+; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
+; SI: v_cmp_ge_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SUB]]|, 0.5
+; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
+; SI: buffer_store_dword [[RESULT]]
+
+; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
+; R600-DAG: ADD {{.*}},
+; R600-DAG: BFI_INT
+; R600-DAG: SETGE
+; R600-DAG: CNDE
+; R600-DAG: ADD
+define void @round_f32(float addrspace(1)* %out, float %x) #0 {
+ %result = call float @llvm.round.f32(float %x) #1
+ store float %result, float addrspace(1)* %out
ret void
}
@@ -20,24 +30,37 @@ entry:
; a test for the scalar case, so the vector tests just check that the
; compiler doesn't crash.
-; FUNC-LABEL: v2f32
+; FUNC-LABEL: {{^}}round_v2f32:
+; SI: s_endpgm
; R600: CF_END
-define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {
+ %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
+ store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: v4f32
+; FUNC-LABEL: {{^}}round_v4f32:
+; SI: s_endpgm
; R600: CF_END
-define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {
+ %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
-declare float @llvm.round.f32(float)
-declare <2 x float> @llvm.round.v2f32(<2 x float>)
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
+; FUNC-LABEL: {{^}}round_v8f32:
+; SI: s_endpgm
+; R600: CF_END
+define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {
+ %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
+ store <8 x float> %result, <8 x float> addrspace(1)* %out
+ ret void
+}
+
+declare float @llvm.round.f32(float) #1
+declare <2 x float> @llvm.round.v2f32(<2 x float>) #1
+declare <4 x float> @llvm.round.v4f32(<4 x float>) #1
+declare <8 x float> @llvm.round.v8f32(<8 x float>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
OpenPOWER on IntegriCloud