5 files changed, 121 insertions, 36 deletions
diff --git a/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
index 981e5c26ee7..0fa0cf0c7bd 100644
--- a/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
+++ b/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
@@ -16,11 +16,11 @@
 #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
-class BlockFrequencyInfo;
 class DebugLoc;
 class Function;
 class LLVMContext;
@@ -34,6 +34,19 @@ public:
   OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
       : F(F), BFI(BFI) {}
 
+  /// \brief This variant can be used to generate ORE on demand (without the
+  /// analysis pass).
+  ///
+  /// Note that this ctor has a very different cost depending on whether
+  /// F->getContext().getDiagnosticHotnessRequested() is on or not.  If it's off
+  /// the operation is free.
+  ///
+  /// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive
+  /// operation since BFI and all its required analyses are computed.  This is
+  /// for example useful for CGSCC passes that can't use function analyses
+  /// passes in the old PM.
+  OptimizationRemarkEmitter(Function *F);
+
   OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
       : F(Arg.F), BFI(Arg.BFI) {}
 
@@ -149,6 +162,9 @@ private:
 
   BlockFrequencyInfo *BFI;
 
+  /// If we generate BFI on demand, we need to free it when ORE is freed.
+  std::unique_ptr<BlockFrequencyInfo> OwnedBFI;
+
   Optional<uint64_t> computeHotness(const Value *V);
 
   OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete;
diff --git a/llvm/include/llvm/Transforms/IPO/InlinerPass.h b/llvm/include/llvm/Transforms/IPO/InlinerPass.h
index 80410373afa..de5f5d84579 100644
--- a/llvm/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/llvm/include/llvm/Transforms/IPO/InlinerPass.h
@@ -27,6 +27,7 @@ class AssumptionCacheTracker;
 class CallSite;
 class DataLayout;
 class InlineCost;
+class OptimizationRemarkEmitter;
 class ProfileSummaryInfo;
 template <class PtrType, unsigned SmallSize> class SmallPtrSet;
 
diff --git a/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
index 5256437da9d..e150d9d2d82 100644
--- a/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -13,13 +13,37 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 
 using namespace llvm;
 
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+    : F(F), BFI(nullptr) {
+  if (!F->getContext().getDiagnosticHotnessRequested())
+    return;
+
+  // First create a dominator tree.
+  DominatorTree DT;
+  DT.recalculate(*F);
+
+  // Generate LoopInfo from it.
+  LoopInfo LI;
+  LI.analyze(DT);
+
+  // Then compute BranchProbabilityInfo.
+  BranchProbabilityInfo BPI;
+  BPI.calculate(*F, LI);
+
+  // Finally compute BFI.
+  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+  BFI = OwnedBFI.get();
+}
+
 Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
   if (!BFI)
     return None;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index d676c09222a..c814b01750d 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
@@ -237,11 +238,9 @@ static bool InlineCallIfPossible(
   return true;
 }
 
-static void emitAnalysis(CallSite CS, const Twine &Msg) {
-  Function *Caller = CS.getCaller();
-  LLVMContext &Ctx = Caller->getContext();
-  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
-  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+static void emitAnalysis(CallSite CS, OptimizationRemarkEmitter &ORE,
+                         const Twine &Msg) {
+  ORE.emitOptimizationRemarkAnalysis(DEBUG_TYPE, CS.getInstruction(), Msg);
 }
 
 /// Return true if inlining of CS can block the caller from being
@@ -323,22 +322,23 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
 
 /// Return true if the inliner should attempt to inline at the given CallSite.
 static bool shouldInline(CallSite CS,
-                         function_ref<InlineCost(CallSite CS)> GetInlineCost) {
+                         function_ref<InlineCost(CallSite CS)> GetInlineCost,
+                         OptimizationRemarkEmitter &ORE) {
   InlineCost IC = GetInlineCost(CS);
 
   if (IC.isAlways()) {
     DEBUG(dbgs() << "    Inlining: cost=always"
                  << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
-                         " should always be inlined (cost=always)");
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName()) +
+                              " should always be inlined (cost=always)");
     return true;
   }
 
   if (IC.isNever()) {
     DEBUG(dbgs() << "    NOT Inlining: cost=never"
                  << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " should never be inlined (cost=never)"));
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
+                                " should never be inlined (cost=never)"));
     return false;
   }
 
@@ -347,10 +347,10 @@ static bool shouldInline(CallSite CS,
     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
                  << ", thres=" << (IC.getCostDelta() + IC.getCost())
                  << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " too costly to inline (cost=") +
-                         Twine(IC.getCost()) + ", threshold=" +
-                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
+    emitAnalysis(CS, ORE, Twine(CS.getCalledFunction()->getName() +
+                                " too costly to inline (cost=") +
+                              Twine(IC.getCost()) + ", threshold=" +
+                              Twine(IC.getCostDelta() + IC.getCost()) + ")");
     return false;
   }
 
@@ -359,20 +359,22 @@ static bool shouldInline(CallSite CS,
     DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
                  << " Cost = " << IC.getCost()
                  << ", outer Cost = " << TotalSecondaryCost << '\n');
-    emitAnalysis(CS, Twine("Not inlining. Cost of inlining " +
-                           CS.getCalledFunction()->getName() +
-                           " increases the cost of inlining " +
-                           CS.getCaller()->getName() + " in other contexts"));
+    emitAnalysis(CS, ORE,
+                 Twine("Not inlining. Cost of inlining " +
+                       CS.getCalledFunction()->getName() +
+                       " increases the cost of inlining " +
+                       CS.getCaller()->getName() + " in other contexts"));
     return false;
   }
 
   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
                << ", thres=" << (IC.getCostDelta() + IC.getCost())
                << ", Call: " << *CS.getInstruction() << '\n');
-  emitAnalysis(
-      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
-              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
-              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
+  emitAnalysis(CS, ORE, CS.getCalledFunction()->getName() +
+                            Twine(" can be inlined into ") +
+                            CS.getCaller()->getName() + " with cost=" +
+                            Twine(IC.getCost()) + " (threshold=" +
+                            Twine(IC.getCostDelta() + IC.getCost()) + ")");
   return true;
 }
 
@@ -513,18 +515,21 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
           continue;
 
-        LLVMContext &CallerCtx = Caller->getContext();
-
         // Get DebugLoc to report. CS will be invalid after Inliner.
         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+        BasicBlock *Block = CS.getParent();
+        // FIXME for new PM: because of the old PM we currently generate ORE and
+        // in turn BFI on demand.  With the new PM, the ORE dependency should
+        // just become a regular analysis dependency.
+        OptimizationRemarkEmitter ORE(Caller);
 
         // If the policy determines that we should inline this function,
         // try to do so.
-        if (!shouldInline(CS, GetInlineCost)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+        if (!shouldInline(CS, GetInlineCost, ORE)) {
+          ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
+                                           Twine(Callee->getName() +
+                                                 " will not be inlined into " +
+                                                 Caller->getName()));
           continue;
         }
 
@@ -532,17 +537,17 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
                                   InlineHistoryID, InsertLifetime, AARGetter,
                                   ImportedFunctionsStats)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+          ORE.emitOptimizationRemarkMissed(DEBUG_TYPE, DLoc, Block,
+                                           Twine(Callee->getName() +
+                                                 " will not be inlined into " +
+                                                 Caller->getName()));
           continue;
         }
         ++NumInlined;
 
         // Report the inline decision.
-        emitOptimizationRemark(
-            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+        ORE.emitOptimizationRemark(
+            DEBUG_TYPE, DLoc, Block,
             Twine(Callee->getName() + " inlined into " + Caller->getName()));
 
         // If inlining this function gave us any new call sites, throw them
diff --git a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll
new file mode 100644
index 00000000000..9611a2dd1bd
--- /dev/null
+++ b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \
+; RUN:     -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \
+; RUN:     | FileCheck %s
+
+; CHECK: foo should always be inlined (cost=always) (hotness: 30)
+; CHECK: foo inlined into bar (hotness: 30)
+; CHECK: foz should never be inlined (cost=never) (hotness: 30)
+; CHECK: foz will not be inlined into bar (hotness: 30)
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @foo() #0 !prof !1 {
+entry:
+  ret i32 4
+}
+
+; Function Attrs: noinline nounwind uwtable
+define i32 @foz() #1 !prof !2 {
+entry:
+  ret i32 2
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar() !prof !3 {
+entry:
+  %call = call i32 @foo()
+  %call2 = call i32 @foz()
+  %mul = mul i32 %call, %call2
+  ret i32 %mul
+}
+
+attributes #0 = { alwaysinline }
+attributes #1 = { noinline }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.5.0 "}
+!1 = !{!"function_entry_count", i64 10}
+!2 = !{!"function_entry_count", i64 20}
+!3 = !{!"function_entry_count", i64 30}