[InstCombine] allow fdiv folds with less than fully 'fast' ops

Note: gcc appears to allow this fold with -freciprocal-math alone, but clang/llvm require more than that with this patch. The wording in the definitions seems fuzzy enough that it could go either way, but we'll err on the conservative side of FMF interpretation. This patch also changes the newly created fmul to have FMF propagated by the last fdiv rather than intersecting the FMF of the fdivs. This matches the behavior of other folds near here. The new fmul is only used to produce an intermediate op for the final fdiv result, so it shouldn't be any stricter than that result. The previous behavior could result in dropping FMF via other folds in instcombine or CSE. Differential Revision: https://reviews.llvm.org/D43398 llvm-svn: 326098
author: Sanjay Patel <spatel@rotateright.com> 2018-02-26 16:02:45 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-02-26 16:02:45 +0000
commit: 31a90468e1e17e7acbd8e0ffff42cd1354550799 (patch)
tree: a59ad33d0e068a8432724f204ea64758c8168f56
parent: 2f0aab92097cd068d6c505a268c93e2fa39276de (diff)
download: bcm5719-llvm-31a90468e1e17e7acbd8e0ffff42cd1354550799.tar.gz
bcm5719-llvm-31a90468e1e17e7acbd8e0ffff42cd1354550799.zip
2 files changed, 11 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 412af4ba13b..815af57fc7a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1347,28 +1347,18 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
       if (Instruction *R = FoldOpIntoSelect(I, SI))
         return R;
 
-  if (I.isFast()) {
+  if (I.hasAllowReassoc() && I.hasAllowReciprocal()) {
     Value *X, *Y;
     if (match(Op0, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) &&
         (!isa<Constant>(Y) || !isa<Constant>(Op1))) {
       // (X / Y) / Z => X / (Y * Z)
-      Value *YZ = Builder.CreateFMul(Y, Op1);
-      if (auto *YZInst = dyn_cast<Instruction>(YZ)) {
-        FastMathFlags FMFIntersect = I.getFastMathFlags();
-        FMFIntersect &= cast<Instruction>(Op0)->getFastMathFlags();
-        YZInst->setFastMathFlags(FMFIntersect);
-      }
+      Value *YZ = Builder.CreateFMulFMF(Y, Op1, &I);
       return BinaryOperator::CreateFDivFMF(X, YZ, &I);
     }
     if (match(Op1, m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))) &&
         (!isa<Constant>(Y) || !isa<Constant>(Op0))) {
       // Z / (X / Y) => (Y * Z) / X
-      Value *YZ = Builder.CreateFMul(Y, Op0);
-      if (auto *YZInst = dyn_cast<Instruction>(YZ)) {
-        FastMathFlags FMFIntersect = I.getFastMathFlags();
-        FMFIntersect &= cast<Instruction>(Op1)->getFastMathFlags();
-        YZInst->setFastMathFlags(FMFIntersect);
-      }
+      Value *YZ = Builder.CreateFMulFMF(Y, Op0, &I);
       return BinaryOperator::CreateFDivFMF(YZ, X, &I);
     }
   }
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 8009a89f10d..8073371c16b 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -126,12 +126,12 @@ define <2 x float> @not_exact_inverse_vec_arcp(<2 x float> %x) {
 
 define float @div_with_div_numerator(float %x, float %y, float %z) {
 ; CHECK-LABEL: @div_with_div_numerator(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul ninf float [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[DIV2:%.*]] = fdiv fast float [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp float [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv reassoc arcp float [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret float [[DIV2]]
 ;
   %div1 = fdiv ninf float %x, %y
-  %div2 = fdiv fast float %div1, %z
+  %div2 = fdiv arcp reassoc float %div1, %z
   ret float %div2
 }
 
@@ -139,12 +139,12 @@ define float @div_with_div_numerator(float %x, float %y, float %z) {
 
 define <2 x float> @div_with_div_denominator(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
 ; CHECK-LABEL: @div_with_div_denominator(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul nnan <2 x float> [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[DIV2:%.*]] = fdiv fast <2 x float> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = fdiv reassoc arcp <2 x float> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x float> [[DIV2]]
 ;
   %div1 = fdiv nnan <2 x float> %x, %y
-  %div2 = fdiv fast <2 x float> %z, %div1
+  %div2 = fdiv arcp reassoc <2 x float> %z, %div1
   ret <2 x float> %div2
 }
 
@@ -342,8 +342,8 @@ define <2 x float> @div_constant_dividend2_reassoc_only(<2 x float> %x) {
 
 define <2 x float> @div_constant_dividend3(<2 x float> %x) {
 ; CHECK-LABEL: @div_constant_dividend3(
-; CHECK-NEXT:    [[T1:%.*]] = fdiv <2 x float> <float 3.000000e+00, float 7.000000e+00>, [[X:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = fdiv reassoc arcp <2 x float> <float 1.500000e+01, float -7.000000e+00>, [[T1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc arcp <2 x float> [[X:%.*]], <float 1.500000e+01, float -7.000000e+00>
+; CHECK-NEXT:    [[T2:%.*]] = fmul reassoc arcp <2 x float> [[TMP1]], <float 0x3FD5555560000000, float 0x3FC24924A0000000>
 ; CHECK-NEXT:    ret <2 x float> [[T2]]
 ;
   %t1 = fdiv <2 x float> <float 3.0e0, float 7.0e0>, %x
author	Sanjay Patel <spatel@rotateright.com>	2018-02-26 16:02:45 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-02-26 16:02:45 +0000
commit	31a90468e1e17e7acbd8e0ffff42cd1354550799 (patch)
tree	a59ad33d0e068a8432724f204ea64758c8168f56
parent	2f0aab92097cd068d6c505a268c93e2fa39276de (diff)
download	bcm5719-llvm-31a90468e1e17e7acbd8e0ffff42cd1354550799.tar.gz bcm5719-llvm-31a90468e1e17e7acbd8e0ffff42cd1354550799.zip