diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/R600/fdiv.f64.ll | 96 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/fdiv64.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/frem.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll | 2 |
4 files changed, 105 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/R600/fdiv.f64.ll b/llvm/test/CodeGen/R600/fdiv.f64.ll new file mode 100644 index 00000000000..276642f9901 --- /dev/null +++ b/llvm/test/CodeGen/R600/fdiv.f64.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=COMMON %s + + +; COMMON-LABEL: {{^}}fdiv_f64: +; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 +; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] +; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]] + +; Check for div_scale bug workaround on SI +; SI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] +; SI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[NUM]], [[DEN]], [[NUM]] + +; COMMON-DAG: v_rcp_f64_e32 [[RCP_SCALE0:v\[[0-9]+:[0-9]+\]]], [[SCALE0]] + +; SI-DAG: v_cmp_eq_i32_e32 vcc, {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, {{v[0-9]+}} +; SI-DAG: s_xor_b64 vcc, [[CMP0]], vcc + +; COMMON-DAG: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[RCP_SCALE0]], 1.0 +; COMMON-DAG: v_fma_f64 [[FMA1:v\[[0-9]+:[0-9]+\]]], [[RCP_SCALE0]], [[FMA0]], [[RCP_SCALE0]] +; COMMON-DAG: v_fma_f64 [[FMA2:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[FMA1]], 1.0 +; COMMON-DAG: v_fma_f64 [[FMA3:v\[[0-9]+:[0-9]+\]]], [[FMA1]], [[FMA2]], [[FMA1]] +; COMMON-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[SCALE1]], [[FMA3]] +; COMMON-DAG: v_fma_f64 [[FMA4:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[MUL]], [[SCALE1]] +; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA3]], [[FMA4]], [[MUL]] +; COMMON: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]] +; COMMON: buffer_store_dwordx2 [[RESULT]] +; COMMON: s_endpgm +define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind { + %gep.1 = getelementptr double addrspace(1)* %in, i32 1 + %num = load double addrspace(1)* %in + %den = load double addrspace(1)* %gep.1 + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_s_v: +define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind { + %den = load double addrspace(1)* %in + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_v_s: +define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind { + %num = load double addrspace(1)* %in + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}fdiv_f64_s_s: +define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) nounwind { + %result = fdiv double %num, %den + store double %result, double addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}v_fdiv_v2f64: +define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind { + %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1 + %num = load <2 x double> addrspace(1)* %in + %den = load <2 x double> addrspace(1)* %gep.1 + %result = fdiv <2 x double> %num, %den + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}s_fdiv_v2f64: +define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2 x double> %den) { + %result = fdiv <2 x double> %num, %den + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}v_fdiv_v4f64: +define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind { + %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1 + %num = load <4 x double> addrspace(1)* %in + %den = load <4 x double> addrspace(1)* %gep.1 + %result = fdiv <4 x double> %num, %den + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; COMMON-LABEL: {{^}}s_fdiv_v4f64: +define void @s_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %num, <4 x double> %den) { + %result = fdiv <4 x double> %num, %den + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/R600/fdiv64.ll b/llvm/test/CodeGen/R600/fdiv64.ll deleted file mode 100644 index c081ccf50ca..00000000000 --- a/llvm/test/CodeGen/R600/fdiv64.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s - -; CHECK: {{^}}fdiv_f64: -; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}} -; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} - -define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1, - double addrspace(1)* %in2) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = fdiv double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void -} diff --git a/llvm/test/CodeGen/R600/frem.ll b/llvm/test/CodeGen/R600/frem.ll index b1a51a41532..02a00704cf0 100644 --- a/llvm/test/CodeGen/R600/frem.ll +++ b/llvm/test/CodeGen/R600/frem.ll @@ -42,18 +42,16 @@ define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ret void } - ; FUNC-LABEL: {{^}}frem_f64: ; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 ; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0 -; TODO: Check SI. -; CI: v_rcp_f64_e32 [[INVY:v\[[0-9]+:[0-9]+\]]], [[Y]] -; CI: v_mul_f64 [[DIV:v\[[0-9]+:[0-9]+\]]], [[X]], [[INVY]] -; CI: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[DIV]] -; CI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], [[TRUNC]], [[Y]] -; SI: v_mul_f64 [[RESULTM:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, [[Y]] -; GCN: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], -[[RESULTM]] -; GCN: buffer_store_dwordx2 [[RESULT]], {{.*}}, 0 +; GCN-DAG: v_div_fmas_f64 +; GCN-DAG: v_div_scale_f64 +; GCN-DAG: v_mul_f64 +; CI: v_trunc_f64_e32 +; CI: v_mul_f64 +; GCN: v_add_f64 +; GCN: buffer_store_dwordx2 ; GCN: s_endpgm define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 { diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll index b80658b5ed4..d2a655bf909 100644 --- a/llvm/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll +++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll @@ -23,6 +23,8 @@ define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { ; FUNC-LABEL: {{^}}rsq_rcp_pat_f64: ; SI-UNSAFE: v_rsq_f64_e32 ; SI-SAFE-NOT: v_rsq_f64_e32 +; SI-SAFE: v_sqrt_f64 +; SI-SAFE: v_rcp_f64 define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone |