summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitry Venikov <quolyk@gmail.com>2018-01-02 05:58:11 +0000
committerDmitry Venikov <quolyk@gmail.com>2018-01-02 05:58:11 +0000
commita58d8deb3a9cde087c770cf18ca31a1b09302040 (patch)
tree8336bf7e9d4a30178981595666b4d6d589b19a9d
parentd2257be8b76b5136a5a4c86d8e16e5f2f7f165c9 (diff)
downloadbcm5719-llvm-a58d8deb3a9cde087c770cf18ca31a1b09302040.tar.gz
bcm5719-llvm-a58d8deb3a9cde087c770cf18ca31a1b09302040.zip
[InstCombine] Missed optimization in math expression: squashing sqrt functions
Summary: This patch enables folding under -ffast-math flag sqrt(a) * sqrt(b) -> sqrt(a*b) Reviewers: hfinkel, spatel, davide Reviewed By: spatel, davide Subscribers: davide, llvm-commits Differential Revision: https://reviews.llvm.org/D41322 llvm-svn: 321637
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp17
-rw-r--r--llvm/test/Transforms/InstCombine/fmul-sqrt.ll67
2 files changed, 84 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 541dde6c47d..38604830b88 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -728,6 +728,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
}
}
+ // sqrt(a) * sqrt(b) -> sqrt(a * b)
+ if (AllowReassociate &&
+ Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Opnd0 = nullptr;
+ Value *Opnd1 = nullptr;
+ if (match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(Opnd0))) &&
+ match(Op1, m_Intrinsic<Intrinsic::sqrt>(m_Value(Opnd1)))) {
+ BuilderTy::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(I.getFastMathFlags());
+ Value *FMulVal = Builder.CreateFMul(Opnd0, Opnd1);
+ Value *Sqrt = Intrinsic::getDeclaration(I.getModule(),
+ Intrinsic::sqrt, I.getType());
+ Value *SqrtCall = Builder.CreateCall(Sqrt, FMulVal);
+ return replaceInstUsesWith(I, SqrtCall);
+ }
+ }
+
// Handle symmetric situation in a 2-iteration loop
Value *Opnd0 = Op0;
Value *Opnd1 = Op1;
diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
new file mode 100644
index 00000000000..0031a61059a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare double @llvm.sqrt.f64(double) nounwind readnone speculatable
+declare void @use(double)
+
+; sqrt(a) * sqrt(b) no math flags
+define double @sqrt_a_sqrt_b(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b(
+; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.sqrt.f64(double [[B:%.*]])
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret double [[MUL]]
+;
+ %1 = call double @llvm.sqrt.f64(double %a)
+ %2 = call double @llvm.sqrt.f64(double %b)
+ %mul = fmul double %1, %2
+ ret double %mul
+}
+
+; sqrt(a) * sqrt(b) fast-math, multiple uses
+define double @sqrt_a_sqrt_b_multiple_uses(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_multiple_uses(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast double @llvm.sqrt.f64(double [[A:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.sqrt.f64(double [[B:%.*]])
+; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
+; CHECK-NEXT: call void @use(double [[TMP2]])
+; CHECK-NEXT: ret double [[MUL]]
+;
+ %1 = call fast double @llvm.sqrt.f64(double %a)
+ %2 = call fast double @llvm.sqrt.f64(double %b)
+ %mul = fmul fast double %1, %2
+ call void @use(double %2)
+ ret double %mul
+}
+
+; sqrt(a) * sqrt(b) => sqrt(a*b) with fast-math
+define double @sqrt_a_sqrt_b_fast(double %a, double %b) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_fast(
+; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP1]])
+; CHECK-NEXT: ret double [[TMP2]]
+;
+ %1 = call fast double @llvm.sqrt.f64(double %a)
+ %2 = call fast double @llvm.sqrt.f64(double %b)
+ %mul = fmul fast double %1, %2
+ ret double %mul
+}
+
+; sqrt(a) * sqrt(b) * sqrt(c) * sqrt(d) => sqrt(a*b*c+d) with fast-math
+define double @sqrt_a_sqrt_b_sqrt_c_sqrt_d_fast(double %a, double %b, double %c, double %d) {
+; CHECK-LABEL: @sqrt_a_sqrt_b_sqrt_c_sqrt_d_fast(
+; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = fmul fast double [[TMP1]], [[C:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[D:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT: ret double [[TMP4]]
+;
+ %1 = call fast double @llvm.sqrt.f64(double %a)
+ %2 = call fast double @llvm.sqrt.f64(double %b)
+ %mul = fmul fast double %1, %2
+ %3 = call fast double @llvm.sqrt.f64(double %c)
+ %mul1 = fmul fast double %mul, %3
+ %4 = call fast double @llvm.sqrt.f64(double %d)
+ %mul2 = fmul fast double %mul1, %4
+ ret double %mul2
+}
OpenPOWER on IntegriCloud