diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-19 23:16:53 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-19 23:16:53 +0000 |
| commit | a1fe17c9adb2b6093f1ce848a48fb8954c27c595 (patch) | |
| tree | 2fcb8b6fcd3f50a1c72634d2808ad3fdc7206d90 /llvm/test/CodeGen/AMDGPU | |
| parent | 1986030b62601d8cd6d74cfc083e4638be3d8b46 (diff) | |
| download | bcm5719-llvm-a1fe17c9adb2b6093f1ce848a48fb8954c27c595.tar.gz bcm5719-llvm-a1fe17c9adb2b6093f1ce848a48fb8954c27c595.zip | |
AMDGPU: Change fdiv lowering based on !fpmath metadata
If 2.5 ulp is acceptable, denormals are not required, and
isn't a reciprocal which will already be handled, replace
with a faster fdiv.
Simplify the lowering tests by using per function
subtarget features.
llvm-svn: 276051
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll | 242 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fdiv.ll | 251 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll | 18 |
3 files changed, 366 insertions, 145 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll index a12132f425d..c9ae39ddaa3 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare.ll @@ -1,8 +1,242 @@ -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare < %s | FileCheck %s -; RUN: opt -S -amdgpu-codegenprepare < %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s +; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s ; Make sure this doesn't crash with no triple -; CHECK-LABEL: @foo( -define void @foo() { +; NOOP-LABEL: @noop_fdiv_fpmath( +; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0 +define void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 { + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out ret void } + +; CHECK-LABEL: @fdiv_fpmath( +; CHECK: %no.md = fdiv float %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 +; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3 +; CHECK: %fast.md.25ulp = call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: arcp.md.25ulp = call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +define void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 { + %no.md = fdiv float %a, %b + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float %a, %b, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %md.1ulp = fdiv float %a, %b, !fpmath !2 + store volatile float %md.1ulp, float addrspace(1)* %out + + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.3ulp = fdiv float %a, %b, !fpmath !3 + store volatile float %md.3ulp, float addrspace(1)* %out + + %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 + store volatile float %fast.md.25ulp, float addrspace(1)* %out + + %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 + store volatile float %arcp.md.25ulp, float addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath( +; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}} +; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 +; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}} +; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0 +; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}} +; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 +define void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 { + %no.md = fdiv float 1.0, %x + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float 1.0, %x, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %arcp.no.md = fdiv arcp float 1.0, %x + store volatile float %arcp.no.md, float addrspace(1)* %out + + %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0 + store volatile float %arcp.25ulp, float addrspace(1)* %out + + %fast.no.md = fdiv fast float 1.0, %x + store volatile float %fast.no.md, float addrspace(1)* %out + + %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0 + store volatile float %fast.25ulp, float addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @fdiv_fpmath_vector( +; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 + +; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0 +; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0 +; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0 +; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0 +; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1 +; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1 +; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0 +; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1 +define void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { + %no.md = fdiv <2 x float> %a, %b + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 + store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out + + %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 + store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out + + %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 + store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath_vector( +; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} +; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1 +; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} +; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}} + +; CHECK: extractelement <2 x float> %x +; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: extractelement <2 x float> %x +; CHECK: fdiv arcp float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: fdiv fast float 1.000000e+00, %{{[0-9]+}}, !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out +define void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { + %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1 + store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out + + %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x + store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out + + %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x + store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out + + %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat( +; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x +; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x +; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}} + +; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 +; CHECK: fdiv arcp float 1.000000e+00, %[[X0]], !fpmath !0 +; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %[[X1]]), !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: %[[X0:[0-9]+]] = extractelement <2 x float> %x, i64 0 +; CHECK: fdiv fast float 1.000000e+00, %[[X0]], !fpmath !0 +; CHECK: %[[X1:[0-9]+]] = extractelement <2 x float> %x, i64 1 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %[[X1]]), !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp +define void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 { + %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x + store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out + + %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x + store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out + + %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x + store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out + + %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; FIXME: Should be able to get fdiv for 1.0 component +; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant( +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: store volatile <2 x float> %arcp.25ulp + +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %{{[0-9]+}}, float %{{[0-9]+}}), !fpmath !0 +; CHECK: store volatile <2 x float> %fast.25ulp +define void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 { + %x.insert = insertelement <2 x float> %x, float 1.0, i32 0 + + %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 + store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out + + %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 + store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out + + ret void +} + +; CHECK-LABEL: @fdiv_fpmath_f32_denormals( +; CHECK: %no.md = fdiv float %a, %b{{$}} +; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1 +; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2 +; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0 +; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3 +; CHECK: call fast float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +; CHECK: call arcp float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0 +define void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 { + %no.md = fdiv float %a, %b + store volatile float %no.md, float addrspace(1)* %out + + %md.half.ulp = fdiv float %a, %b, !fpmath !1 + store volatile float %md.half.ulp, float addrspace(1)* %out + + %md.1ulp = fdiv float %a, %b, !fpmath !2 + store volatile float %md.1ulp, float addrspace(1)* %out + + %md.25ulp = fdiv float %a, %b, !fpmath !0 + store volatile float %md.25ulp, float addrspace(1)* %out + + %md.3ulp = fdiv float %a, %b, !fpmath !3 + store volatile float %md.3ulp, float addrspace(1)* %out + + %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 + store volatile float %fast.md.25ulp, float addrspace(1)* %out + + %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 + store volatile float %arcp.md.25ulp, float addrspace(1)* %out + + ret void +} + +attributes #0 = { nounwind optnone noinline } +attributes #1 = { nounwind } +attributes #2 = { nounwind "target-features"="+fp32-denormals" } + +; CHECK: !0 = !{float 2.500000e+00} +; CHECK: !1 = !{float 5.000000e-01} +; CHECK: !2 = !{float 1.000000e+00} +; CHECK: !3 = !{float 3.000000e+00} + +!0 = !{float 2.500000e+00} +!1 = !{float 5.000000e-01} +!2 = !{float 1.000000e+00} +!3 = !{float 3.000000e+00} diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.ll b/llvm/test/CodeGen/AMDGPU/fdiv.ll index 4021233e778..65464cdba60 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv.ll @@ -1,8 +1,4 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; These tests check that fdiv is expanded correctly and also test that the @@ -15,22 +11,59 @@ ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 +; SI: v_div_scale_f32 +; SI-DAG: v_div_scale_f32 ; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_mul_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_fma_f32 +; SI: v_div_fmas_f32 +; SI: v_div_fixup_f32 +define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { +entry: + %fdiv = fdiv float %a, %b + store float %fdiv, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fdiv_25ulp_f32: +; SI: v_cndmask_b32 +; SI: v_mul_f32 +; SI: v_rcp_f32 +; SI: v_mul_f32 +; SI: v_mul_f32 +define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 { +entry: + %fdiv = fdiv float %a, %b, !fpmath !0 + store float %fdiv, float addrspace(1)* %out + ret void +} + +; Use correct fdiv +; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32: +; SI: v_fma_f32 +; SI: v_div_fmas_f32 +; SI: v_div_fixup_f32 +define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { +entry: + %fdiv = fdiv float %a, %b, !fpmath !0 + store float %fdiv, float addrspace(1)* %out + ret void +} -; I754-DAG: v_div_scale_f32 -; I754-DAG: v_rcp_f32 -; I754-DAG: v_fma_f32 -; I754-DAG: v_mul_f32 -; I754-DAG: v_fma_f32 -; I754-DAG: v_div_fixup_f32 -define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { +; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: +; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} +; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { entry: - %0 = fdiv float %a, %b - store float %0, float addrspace(1)* %out + %fdiv = fdiv fast float %a, %b + store float %fdiv, float addrspace(1)* %out ret void } @@ -38,15 +71,14 @@ entry: ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) { +; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} +; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 { entry: - %0 = fdiv fast float %a, %b - store float %0, float addrspace(1)* %out + %fdiv = fdiv fast float %a, %b + store float %fdiv, float addrspace(1)* %out ret void } @@ -54,15 +86,14 @@ entry: ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) { +; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} +; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] +; SI-NOT: [[RESULT]] +; SI: buffer_store_dword [[RESULT]] +define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 { entry: - %0 = fdiv arcp float %a, %b - store float %0, float addrspace(1)* %out + %fdiv = fdiv arcp float %a, %b + store float %fdiv, float addrspace(1)* %out ret void } @@ -72,26 +103,24 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 +; SI: v_div_scale_f32 +; SI: v_div_scale_f32 +; SI: v_div_scale_f32 +; SI: v_div_scale_f32 +define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { +entry: + %fdiv = fdiv <2 x float> %a, %b + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out + ret void +} -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32: +; SI: v_cmp_gt_f32 +; SI: v_cmp_gt_f32 +define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %0 = fdiv <2 x float> %a, %b - store <2 x float> %0, <2 x float> addrspace(1)* %out + %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0 + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -101,19 +130,12 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %0 = fdiv fast <2 x float> %a, %b - store <2 x float> %0, <2 x float> addrspace(1)* %out + %fdiv = fdiv fast <2 x float> %a, %b + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -123,19 +145,12 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_rcp_f32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { entry: - %0 = fdiv arcp <2 x float> %a, %b - store <2 x float> %0, <2 x float> addrspace(1)* %out + %fdiv = fdiv arcp <2 x float> %a, %b + store <2 x float> %fdiv, <2 x float> addrspace(1)* %out ret void } @@ -149,37 +164,11 @@ entry: ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 - -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_scale_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -; I754: v_div_fixup_f32 -define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +; SI: v_div_fixup_f32 +; SI: v_div_fixup_f32 +; SI: v_div_fixup_f32 +; SI: v_div_fixup_f32 +define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr @@ -198,24 +187,11 @@ define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1) ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr @@ -234,24 +210,11 @@ define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> ad ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_rcp_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 -; UNSAFE-FP: v_mul_f32_e32 - -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -; SI-DAG: v_rcp_f32 -; SI-DAG: v_mul_f32 -define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +; SI: v_rcp_f32 +define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 %a = load <4 x float>, <4 x float> addrspace(1) * %in %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr @@ -259,3 +222,9 @@ define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> ad store <4 x float> %result, <4 x float> addrspace(1)* %out ret void } + +attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" } +attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" } +attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" } + +!0 = !{float 2.500000e+00} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll new file mode 100644 index 00000000000..54d7848da3b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +declare float @llvm.amdgcn.fdiv.fast(float, float) #0 + +; CHECK-LABEL: {{^}}test_fdiv_fast: +; CHECK: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc +; CHECK: v_mul_f32_e32 +; CHECK: v_rcp_f32_e32 +; CHECK: v_mul_f32_e32 +; CHECK: v_mul_f32_e32 +define void @test_fdiv_fast(float addrspace(1)* %out, float %a, float %b) #1 { + %fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b) + store float %fdiv, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } |

