[InstCombine] limit nsz: -(X - Y) --> Y - X to hasOneUse()

As noted in the post-commit discussion for r329350, we shouldn't generally assume that fsub is the same cost as fneg. llvm-svn: 329429
author: Sanjay Patel <spatel@rotateright.com> 2018-04-06 17:24:08 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-04-06 17:24:08 +0000
commit: a9ca709011d0621afa54fee11de39502db4b261b (patch)
tree: 80ebb921be9e1373c86f38640e16fb3d42d51fcc
parent: e0c2c49a15fb9e1f5a424c9269ab820038c480eb (diff)
download: bcm5719-llvm-a9ca709011d0621afa54fee11de39502db4b261b.tar.gz
bcm5719-llvm-a9ca709011d0621afa54fee11de39502db4b261b.zip
2 files changed, 16 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index fac3ea86359..5efdc546472 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1698,21 +1698,18 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
                                   SQ.getWithInstruction(&I)))
     return replaceInstUsesWith(I, V);
 
-  Value *X, *Y;
-  if (I.hasNoSignedZeros()) {
-    // Subtraction from -0.0 is the canonical form of fneg.
-    // fsub nsz 0, X ==> fsub nsz -0.0, X
-    if (match(Op0, m_PosZeroFP()))
-      return BinaryOperator::CreateFNegFMF(Op1, &I);
-
-    // With no-signed-zeros: -(X - Y) --> Y - X
-    if (match(Op0, m_NegZeroFP()) && match(Op1, m_FSub(m_Value(X), m_Value(Y))))
-      return BinaryOperator::CreateFSubFMF(Y, X, &I);
-  }
+  // Subtraction from -0.0 is the canonical form of fneg.
+  // fsub nsz 0, X ==> fsub nsz -0.0, X
+  if (I.hasNoSignedZeros() && match(Op0, m_PosZeroFP()))
+    return BinaryOperator::CreateFNegFMF(Op1, &I);
 
-  // More generally than above, if Op0 is not -0.0: Z - (X - Y) --> Z + (Y - X)
+  // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
   // Canonicalize to fadd to make analysis easier.
   // This can also help codegen because fadd is commutative.
+  // Note that if this fsub was really an fneg, the fadd with -0.0 will get
+  // killed later. We still limit that particular transform with 'hasOneUse'
+  // because an fneg is assumed better/cheaper than a generic fsub.
+  Value *X, *Y;
   if (I.hasNoSignedZeros() || CannotBeNegativeZero(Op0, SQ.TLI)) {
     if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
       Value *NewSub = Builder.CreateFSubFMF(Y, X, &I);
diff --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll
index a209dee88fd..d3b1fe3d782 100644
--- a/llvm/test/Transforms/InstCombine/fsub.ll
+++ b/llvm/test/Transforms/InstCombine/fsub.ll
@@ -19,20 +19,24 @@ define float @test1(float %x, float %y) {
 
 define float @neg_sub_nsz(float %x, float %y) {
 ; CHECK-LABEL: @neg_sub_nsz(
-; CHECK-NEXT:    [[T2:%.*]] = fsub nsz float [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    ret float [[T2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub nsz float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %t1 = fsub float %x, %y
   %t2 = fsub nsz float -0.0, %t1
   ret float %t2
 }
 
+; If the subtract has another use, we don't do the transform (even though it
+; doesn't increase the IR instruction count) because we assume that fneg is 
+; easier to analyze and generally cheaper than generic fsub.
+
 declare void @use(float)
 
 define float @neg_sub_nsz_extra_use(float %x, float %y) {
 ; CHECK-LABEL: @neg_sub_nsz_extra_use(
 ; CHECK-NEXT:    [[T1:%.*]] = fsub float [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = fsub nsz float [[Y]], [[X]]
+; CHECK-NEXT:    [[T2:%.*]] = fsub nsz float -0.000000e+00, [[T1]]
 ; CHECK-NEXT:    call void @use(float [[T1]])
 ; CHECK-NEXT:    ret float [[T2]]
 ;
author	Sanjay Patel <spatel@rotateright.com>	2018-04-06 17:24:08 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-04-06 17:24:08 +0000
commit	a9ca709011d0621afa54fee11de39502db4b261b (patch)
tree	80ebb921be9e1373c86f38640e16fb3d42d51fcc
parent	e0c2c49a15fb9e1f5a424c9269ab820038c480eb (diff)
download	bcm5719-llvm-a9ca709011d0621afa54fee11de39502db4b261b.tar.gz bcm5719-llvm-a9ca709011d0621afa54fee11de39502db4b261b.zip