summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp9
-rw-r--r--llvm/test/CodeGen/PowerPC/fmf-propagation.ll52
-rw-r--r--llvm/test/CodeGen/X86/fmf-flags.ll15
-rw-r--r--llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll12
4 files changed, 64 insertions, 24 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 50953275d33..6d59f330048 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10893,17 +10893,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
}
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
- if (!DAG.getTarget().Options.UnsafeFPMath)
+ SDNodeFlags Flags = N->getFlags();
+ if (!DAG.getTarget().Options.UnsafeFPMath &&
+ !Flags.hasApproximateFuncs())
return SDValue();
SDValue N0 = N->getOperand(0);
if (TLI.isFsqrtCheap(N0, DAG))
return SDValue();
- // TODO: FSQRT nodes should have flags that propagate to the created nodes.
- // For now, create a Flags object for use with reassociation math transforms.
- SDNodeFlags Flags;
- Flags.setAllowReassociation(true);
+ // FSQRT nodes have flags that propagate to the created nodes.
return buildSqrtEstimate(N0, Flags);
}
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 4c87c3dfbbd..e1a7019fc72 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -300,18 +300,34 @@ define float @fmul_fma_fast2(float %x) {
; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; FMFDEBUG: fsqrt afn {{t[0-9]+}}
+; FMFDEBUG: fmul afn {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:'
-; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG: fmul afn {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:'
define float @sqrt_afn(float %x) {
; FMF-LABEL: sqrt_afn:
-; FMF: # %bb.0:
-; FMF-NEXT: xssqrtsp 1, 1
-; FMF-NEXT: blr
+; FMF: # %bb.0:
+; FMF-NEXT: xxlxor 0, 0, 0
+; FMF-NEXT: fcmpu 0, 1, 0
+; FMF-NEXT: beq 0, .LBB10_2
+; FMF-NEXT: # %bb.1:
+; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
+; FMF-NEXT: xsrsqrtesp 3, 1
+; FMF-NEXT: addi 3, 3, .LCPI10_0@toc@l
+; FMF-NEXT: lfsx 0, 0, 3
+; FMF-NEXT: xsmulsp 2, 1, 0
+; FMF-NEXT: xsmulsp 4, 3, 3
+; FMF-NEXT: xssubsp 2, 2, 1
+; FMF-NEXT: xsmulsp 2, 2, 4
+; FMF-NEXT: xssubsp 0, 0, 2
+; FMF-NEXT: xsmulsp 0, 3, 0
+; FMF-NEXT: xsmulsp 0, 0, 1
+; FMF-NEXT: .LBB10_2:
+; FMF-NEXT: fmr 1, 0
+; FMF-NEXT: blr
;
; GLOBAL-LABEL: sqrt_afn:
; GLOBAL: # %bb.0:
@@ -340,18 +356,34 @@ define float @sqrt_afn(float %x) {
; The call is now fully 'fast'. This implies that approximation is allowed.
; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
+; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:'
-; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
+; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:'
define float @sqrt_fast(float %x) {
; FMF-LABEL: sqrt_fast:
-; FMF: # %bb.0:
-; FMF-NEXT: xssqrtsp 1, 1
-; FMF-NEXT: blr
+; FMF: # %bb.0:
+; FMF-NEXT: xxlxor 0, 0, 0
+; FMF-NEXT: fcmpu 0, 1, 0
+; FMF-NEXT: beq 0, .LBB11_2
+; FMF-NEXT: # %bb.1:
+; FMF-NEXT: xsrsqrtesp 2, 1
+; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha
+; FMF-NEXT: fneg 0, 1
+; FMF-NEXT: fmr 4, 1
+; FMF-NEXT: addi 3, 3, .LCPI11_0@toc@l
+; FMF-NEXT: lfsx 3, 0, 3
+; FMF-NEXT: xsmaddasp 4, 0, 3
+; FMF-NEXT: xsmulsp 0, 2, 2
+; FMF-NEXT: xsmaddasp 3, 4, 0
+; FMF-NEXT: xsmulsp 0, 2, 3
+; FMF-NEXT: xsmulsp 0, 0, 1
+; FMF-NEXT: .LBB11_2:
+; FMF-NEXT: fmr 1, 0
+; FMF-NEXT: blr
;
; GLOBAL-LABEL: sqrt_fast:
; GLOBAL: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/fmf-flags.ll b/llvm/test/CodeGen/X86/fmf-flags.ll
index ca520b3d675..81a34b510e0 100644
--- a/llvm/test/CodeGen/X86/fmf-flags.ll
+++ b/llvm/test/CodeGen/X86/fmf-flags.ll
@@ -7,9 +7,18 @@ declare float @llvm.sqrt.f32(float %x);
define float @fast_recip_sqrt(float %x) {
; X64-LABEL: fast_recip_sqrt:
; X64: # %bb.0:
-; X64-NEXT: sqrtss %xmm0, %xmm1
-; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: divss %xmm1, %xmm0
+; X64-NEXT: rsqrtss %xmm0, %xmm1
+; X64-NEXT: xorps %xmm2, %xmm2
+; X64-NEXT: cmpeqss %xmm0, %xmm2
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: movss {{.*}}(%rip), %xmm3
+; X64-NEXT: mulss %xmm0, %xmm3
+; X64-NEXT: mulss %xmm1, %xmm0
+; X64-NEXT: addss {{.*}}(%rip), %xmm0
+; X64-NEXT: mulss %xmm3, %xmm0
+; X64-NEXT: andnps %xmm0, %xmm2
+; X64-NEXT: movss {{.*}}(%rip), %xmm0
+; X64-NEXT: divss %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: fast_recip_sqrt:
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 53dc964aca3..33e44731557 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -7,16 +7,16 @@ define float @foo(float %f) #0 {
; CHECK: body:
; CHECK: %0:fr32 = COPY $xmm0
; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0
-; CHECK: %3:fr32 = reassoc VMULSSrr %0, %1
+; CHECK: %3:fr32 = VMULSSrr %0, %1
; CHECK: %4:fr32 = VMOVSSrm
; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4
; CHECK: %6:fr32 = VMOVSSrm
-; CHECK: %7:fr32 = reassoc VMULSSrr %1, %6
-; CHECK: %8:fr32 = reassoc VMULSSrr killed %7, killed %5
-; CHECK: %9:fr32 = reassoc VMULSSrr %0, %8
+; CHECK: %7:fr32 = VMULSSrr %1, %6
+; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5
+; CHECK: %9:fr32 = VMULSSrr %0, %8
; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4
-; CHECK: %11:fr32 = reassoc VMULSSrr %9, %6
-; CHECK: %12:fr32 = reassoc VMULSSrr killed %11, killed %10
+; CHECK: %11:fr32 = VMULSSrr %9, %6
+; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10
; CHECK: %14:fr32 = FsFLD0SS
; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0
; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13
OpenPOWER on IntegriCloud