diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/Analysis/Passes.h | 8 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/ProfileDataLoader.h | 148 | ||||
| -rw-r--r-- | llvm/include/llvm/InitializePasses.h | 1 | ||||
| -rw-r--r-- | llvm/include/llvm/LinkAllPasses.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Analysis/Analysis.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Analysis/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | llvm/lib/Analysis/ProfileDataLoader.cpp | 186 | ||||
| -rw-r--r-- | llvm/lib/Analysis/ProfileDataLoaderPass.cpp | 188 | ||||
| -rw-r--r-- | llvm/lib/Analysis/ProfileInfo.cpp | 26 | ||||
| -rw-r--r-- | llvm/test/Analysis/Profiling/load-branch-weights-ifs.ll | 119 | ||||
| -rw-r--r-- | llvm/test/Analysis/Profiling/load-branch-weights-loops.ll | 185 | ||||
| -rw-r--r-- | llvm/test/Analysis/Profiling/load-branch-weights-switches.ll | 162 | 
12 files changed, 1001 insertions, 26 deletions
diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h index a22bd12dec1..c52f846b5ca 100644 --- a/llvm/include/llvm/Analysis/Passes.h +++ b/llvm/include/llvm/Analysis/Passes.h @@ -103,6 +103,14 @@ namespace llvm {    //===--------------------------------------------------------------------===//    // +  // createProfileMetadataLoaderPass - This pass loads information from a +  // profile dump file and sets branch weight metadata. +  // +  ModulePass *createProfileMetadataLoaderPass(); +  extern char &ProfileMetadataLoaderPassID; + +  //===--------------------------------------------------------------------===// +  //    // createNoProfileInfoPass - This pass implements the default "no profile".    //    ImmutablePass *createNoProfileInfoPass(); diff --git a/llvm/include/llvm/Analysis/ProfileDataLoader.h b/llvm/include/llvm/Analysis/ProfileDataLoader.h new file mode 100644 index 00000000000..3d15bda364d --- /dev/null +++ b/llvm/include/llvm/Analysis/ProfileDataLoader.h @@ -0,0 +1,148 @@ +//===- ProfileDataLoader.h - Load & convert profile info ----*- C++ -*-===// +// +//                      The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load profiling data from a dump file. +// The ProfileDataT<FType, BType> class is used to store the mapping of this +// data to control flow edges. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_PROFILEDATALOADER_H +#define LLVM_ANALYSIS_PROFILEDATALOADER_H + +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include <vector> +#include <string> +#include <map> + +namespace llvm { + +class ModulePass; +class Function; +class BasicBlock; + +// Helpers for dumping edges to dbgs(). +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, +                                                  const BasicBlock *> E); +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB); +raw_ostream& operator<<(raw_ostream &O, const Function *F); + +/// \brief The ProfileDataT<FType, BType> class is used to store the mapping of +/// profiling data to control flow edges. +/// +/// An edge is defined by its source and sink basic blocks. +template<class FType, class BType> +class ProfileDataT { +  public: +  // The profiling information defines an Edge by its source and sink basic +  // blocks. +  typedef std::pair<const BType*, const BType*> Edge; + +  private: +  typedef std::map<Edge, unsigned> EdgeWeights; + +  /// \brief Count the number of times a transition between two blocks is +  /// executed. +  /// +  /// As a special case, we also hold an edge from the null BasicBlock to the +  /// entry block to indicate how many times the function was entered. +  std::map<const FType*, EdgeWeights> EdgeInformation; + +  public: +  static char ID; // Class identification, replacement for typeinfo +  ProfileDataT() {}; +  ~ProfileDataT() {}; + +  /// getFunction() - Returns the Function for an Edge. +  static const FType* getFunction(Edge e) { +    // e.first may be NULL +    assert(   ((!e.first) || (e.first->getParent() == e.second->getParent())) +           && "A ProfileData::Edge can not be between two functions"); +    assert(e.second && "A ProfileData::Edge must have a real sink"); +    return e.second->getParent(); +  } + +  /// getEdge() - Creates an Edge between two BasicBlocks. +  static Edge getEdge(const BType *Src, const BType *Dest) { +    return std::make_pair(Src, Dest); +  } + +  /// getEdgeWeight - Return the number of times that a given edge was +  /// executed. +  unsigned getEdgeWeight(Edge e) const { +    const FType *f = getFunction(e); +    assert(   (EdgeInformation.find(f) != EdgeInformation.end()) +           && "No profiling information for function"); +    EdgeWeights weights = EdgeInformation.find(f)->second; + +    assert(   (weights.find(e) != weights.end()) +           && "No profiling information for edge"); +    return weights.find(e)->second; +  } + +  /// addEdgeWeight - Add 'weight' to the already stored execution count for +  /// this edge. +  void addEdgeWeight(Edge e, unsigned weight) { +      EdgeInformation[getFunction(e)][e] += weight; +  } +}; + +typedef ProfileDataT<Function, BasicBlock> ProfileData; +//typedef ProfileDataT<MachineFunction, MachineBasicBlock> MachineProfileData; + +/// The ProfileDataLoader class is used to load raw profiling data from the +/// dump file. +class ProfileDataLoader { +private: +  /// The name of the file where the raw profiling data is stored. +  const std::string &Filename; + +  /// A vector of the command line arguments used when the target program was +  /// run to generate profiling data.  One entry per program run. +  std::vector<std::string> CommandLines; + +  /// The raw values for how many times each edge was traversed, values from +  /// multiple program runs are accumulated. +  std::vector<unsigned> EdgeCounts; + +public: +  /// ProfileDataLoader ctor - Read the specified profiling data file, exiting +  /// the program if the file is invalid or broken. +  ProfileDataLoader(const char *ToolName, const std::string &Filename); + +  /// A special value used to represent the weight of an edge which has not +  /// been counted yet. +  static const unsigned Uncounted; + +  /// The maximum value that can be stored in a profiling counter. +  static const unsigned MaxCount; + +  /// getNumExecutions - Return the number of times the target program was run +  /// to generate this profiling data. +  unsigned getNumExecutions() const { return CommandLines.size(); } + +  /// getExecution - Return the command line parameters used to generate the +  /// i'th set of profiling data. +  const std::string& getExecution(unsigned i) const { return CommandLines[i]; } + +  const std::string& getFileName() const { return Filename; } + +  /// getRawEdgeCounts - Return the raw profiling data, this is just a list of +  /// numbers with no mappings to edges. +  const std::vector<unsigned>& getRawEdgeCounts() const { return EdgeCounts; } +}; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename. +ModulePass *createProfileMetadataLoaderPass(const std::string &Filename); + +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index de97957a84c..994311334cd 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -141,6 +141,7 @@ void initializeLiveRegMatrixPass(PassRegistry&);  void initializeLiveStacksPass(PassRegistry&);  void initializeLiveVariablesPass(PassRegistry&);  void initializeLoaderPassPass(PassRegistry&); +void initializeProfileMetadataLoaderPassPass(PassRegistry&);  void initializePathProfileLoaderPassPass(PassRegistry&);  void initializeLocalStackSlotPassPass(PassRegistry&);  void initializeLoopDeletionPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 697c94c094b..fe4c92a295e 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -107,6 +107,7 @@ namespace {        (void) llvm::createProfileVerifierPass();        (void) llvm::createPathProfileVerifierPass();        (void) llvm::createProfileLoaderPass(); +      (void) llvm::createProfileMetadataLoaderPass();        (void) llvm::createPathProfileLoaderPass();        (void) llvm::createPromoteMemoryToRegisterPass();        (void) llvm::createDemoteRegisterToMemoryPass(); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 0ba6af93b51..87a75fd3b11 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -61,6 +61,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {    initializePathProfileLoaderPassPass(Registry);    initializeProfileVerifierPassPass(Registry);    initializePathProfileVerifierPass(Registry); +  initializeProfileMetadataLoaderPassPass(Registry);    initializeRegionInfoPass(Registry);    initializeRegionViewerPass(Registry);    initializeRegionPrinterPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 96e68b41991..e461848e861 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -44,6 +44,8 @@ add_llvm_library(LLVMAnalysis    ProfileInfoLoader.cpp    ProfileInfoLoaderPass.cpp    ProfileVerifierPass.cpp +  ProfileDataLoader.cpp +  ProfileDataLoaderPass.cpp    RegionInfo.cpp    RegionPass.cpp    RegionPrinter.cpp diff --git a/llvm/lib/Analysis/ProfileDataLoader.cpp b/llvm/lib/Analysis/ProfileDataLoader.cpp new file mode 100644 index 00000000000..000649228a6 --- /dev/null +++ b/llvm/lib/Analysis/ProfileDataLoader.cpp @@ -0,0 +1,186 @@ +//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// +// +//                      The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The ProfileDataLoader class is used to load raw profiling data from the dump +// file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/InstrTypes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> +#include <cstdlib> +using namespace llvm; + +namespace llvm { + +template<> +char ProfileDataT<Function,BasicBlock>::ID = 0; + +raw_ostream& operator<<(raw_ostream &O, const Function *F) { +  return O << F->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { +  return O << BB->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { +  O << "("; + +  if (E.first) +    O << E.first; +  else +    O << "0"; + +  O << ","; + +  if (E.second) +    O << E.second; +  else +    O << "0"; + +  return O << ")"; +} + +} // namespace llvm + +/// ByteSwap - Byteswap 'Var' if 'Really' is true.  Required when the compiler +/// host and target have different endianness. +static inline unsigned ByteSwap(unsigned Var, bool Really) { +  if (!Really) return Var; +  return ((Var & (255U<< 0U)) << 24U) | +         ((Var & (255U<< 8U)) <<  8U) | +         ((Var & (255U<<16U)) >>  8U) | +         ((Var & (255U<<24U)) >> 24U); +} + +/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one +/// (or both) may not be defined. +static unsigned AddCounts(unsigned A, unsigned B) { +  // If either value is undefined, use the other. +  // Undefined + undefined = undefined. +  if (A == ProfileDataLoader::Uncounted) return B; +  if (B == ProfileDataLoader::Uncounted) return A; + +  // Saturate to the maximum storable value.  This could change taken/nottaken +  // ratios, but is presumably better than wrapping and thus potentially +  // inverting ratios. +  unsigned long long tmp = (unsigned long long)A + (unsigned long long)B; +  if (tmp > (unsigned long long)ProfileDataLoader::MaxCount) +    tmp = ProfileDataLoader::MaxCount; +  return (unsigned)tmp; +} + +/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' +template <typename T> +static void ReadProfilingData(const char *ToolName, FILE *F, +                              std::vector<T> &Data, size_t NumEntries) { +  // Read in the block of data... +  if (fread(&Data[0], sizeof(T), NumEntries, F) != NumEntries) { +    errs() << ToolName << ": profiling data truncated!\n"; +    perror(0); +    exit(1); +  } +} + +/// ReadProfilingNumEntries - Read how many entries are in this profiling data +/// packet. +static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, +                                        bool ShouldByteSwap) { +  std::vector<unsigned> NumEntries(1); +  ReadProfilingData<unsigned>(ToolName, F, NumEntries, 1); +  return ByteSwap(NumEntries[0], ShouldByteSwap); +} + +/// ReadProfilingBlock - Read the number of entries in the next profiling data +/// packet and then accumulate the entries into 'Data'. +static void ReadProfilingBlock(const char *ToolName, FILE *F, +                               bool ShouldByteSwap, +                               std::vector<unsigned> &Data) { +  // Read the number of entries... +  unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + +  // Read in the data. +  std::vector<unsigned> TempSpace(NumEntries); +  ReadProfilingData<unsigned>(ToolName, F, TempSpace, (size_t)NumEntries); + +  // Make sure we have enough space ... +  if (Data.size() < NumEntries) +    Data.resize(NumEntries, ProfileDataLoader::Uncounted); + +  // Accumulate the data we just read into the existing data. +  for (unsigned i = 0; i < NumEntries; ++i) { +    Data[i] = AddCounts(ByteSwap(TempSpace[i], ShouldByteSwap), Data[i]); +  } +} + +/// ReadProfilingArgBlock - Read the command line arguments that the progam was +/// run with when the current profiling data packet(s) were generated. +static void ReadProfilingArgBlock(const char *ToolName, FILE *F, +                                  bool ShouldByteSwap, +                                  std::vector<std::string> &CommandLines) { +  // Read the number of bytes ... +  unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); + +  // Read in the arguments (if there are any to read).  Round up the length to +  // the nearest 4-byte multiple. +  std::vector<char> Args(ArgLength+4); +  if (ArgLength) +    ReadProfilingData<char>(ToolName, F, Args, (ArgLength+3) & ~3); + +  // Store the arguments. +  CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); +} + +const unsigned ProfileDataLoader::Uncounted = ~0U; +const unsigned ProfileDataLoader::MaxCount = ~0U - 1U; + +/// ProfileDataLoader ctor - Read the specified profiling data file, exiting +/// the program if the file is invalid or broken. +ProfileDataLoader::ProfileDataLoader(const char *ToolName, +                                     const std::string &Filename) +  : Filename(Filename) { +  FILE *F = fopen(Filename.c_str(), "rb"); +  if (F == 0) { +    errs() << ToolName << ": Error opening '" << Filename << "': "; +    perror(0); +    exit(1); +  } + +  // Keep reading packets until we run out of them. +  unsigned PacketType; +  while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { +    // If the low eight bits of the packet are zero, we must be dealing with an +    // endianness mismatch.  Byteswap all words read from the profiling +    // information.  This can happen when the compiler host and target have +    // different endianness. +    bool ShouldByteSwap = (char)PacketType == 0; +    PacketType = ByteSwap(PacketType, ShouldByteSwap); + +    switch (PacketType) { +      case ArgumentInfo: +        ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); +        break; + +      case EdgeInfo: +        ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); +        break; + +      default: +        errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; +        exit(1); +    } +  } + +  fclose(F); +} diff --git a/llvm/lib/Analysis/ProfileDataLoaderPass.cpp b/llvm/lib/Analysis/ProfileDataLoaderPass.cpp new file mode 100644 index 00000000000..2a61a0b6ed6 --- /dev/null +++ b/llvm/lib/Analysis/ProfileDataLoaderPass.cpp @@ -0,0 +1,188 @@ +//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass loads profiling data from a dump file and sets branch weight +// metadata. +// +// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" +// once ProfileInfo etc. has been removed. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-metadata-loader" +#include "llvm/BasicBlock.h" +#include "llvm/InstrTypes.h" +#include "llvm/Module.h" +#include "llvm/LLVMContext.h" +#include "llvm/MDBuilder.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileDataLoader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +#include <vector> +using namespace llvm; + +STATISTIC(NumEdgesRead, "The # of edges read."); +STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); + +static cl::opt<std::string> +ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), +                  cl::value_desc("filename"), +                  cl::desc("Profile file loaded by -profile-metadata-loader")); + +namespace { +  /// This pass loads profiling data from a dump file and sets branch weight +  /// metadata. +  class ProfileMetadataLoaderPass : public ModulePass { +    std::string Filename; +  public: +    static char ID; // Class identification, replacement for typeinfo +    explicit ProfileMetadataLoaderPass(const std::string &filename = "") +        : ModulePass(ID), Filename(filename) { +      initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); +      if (filename.empty()) Filename = ProfileMetadataFilename; +    } + +    virtual void getAnalysisUsage(AnalysisUsage &AU) const { +      AU.setPreservesAll(); +    } + +    virtual const char *getPassName() const { +      return "Profile loader"; +    } + +    virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, +                          std::vector<unsigned>&); +    virtual unsigned matchEdges(Module&, ProfileData&, std::vector<unsigned>&); +    virtual void setBranchWeightMetadata(Module&, ProfileData&); + +    virtual bool runOnModule(Module &M); +  }; +}  // End of anonymous namespace + +char ProfileMetadataLoaderPass::ID = 0; +INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", +              "Load profile information from llvmprof.out", false, true) +INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", +              "Load profile information from llvmprof.out", false, true) + +char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; + +/// createProfileMetadataLoaderPass - This function returns a Pass that loads +/// the profiling information for the module from the specified filename, +/// making it available to the optimizers. +ModulePass *llvm::createProfileMetadataLoaderPass() {  +    return new ProfileMetadataLoaderPass(); +} +ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { +  return new ProfileMetadataLoaderPass(Filename); +} + +/// readEdge - Take the value from a profile counter and assign it to an edge. +void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, +                                         ProfileData &PB, ProfileData::Edge e, +                                         std::vector<unsigned> &Counters) { +  if (ReadCount < Counters.size()) { +    unsigned weight = Counters[ReadCount]; +    assert(weight != ProfileDataLoader::Uncounted); +    PB.addEdgeWeight(e, weight); + +    DEBUG(dbgs() << "-- Read Edge Counter for " << e +                 << " (# "<< (ReadCount) << "): " +                 << PB.getEdgeWeight(e) << "\n"); +  } +} + +/// matchEdges - Link every profile counter with an edge. +unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, +                                             std::vector<unsigned> &Counters) { +  if (Counters.size() == 0) return 0; + +  unsigned ReadCount = 0; + +  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { +    if (F->isDeclaration()) continue; +    DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); +    readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); +    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { +      TerminatorInst *TI = BB->getTerminator(); +      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { +        readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), +                 Counters); +      } +    } +  } + +  return ReadCount; +} + +/// setBranchWeightMetadata - Translate the counter values associated with each +/// edge into branch weights for each conditional branch (a branch with 2 or +/// more desinations). +void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, +                                                        ProfileData &PB) { +  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { +    if (F->isDeclaration()) continue; +    DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); + +    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { +      TerminatorInst *TI = BB->getTerminator(); +      unsigned NumSuccessors = TI->getNumSuccessors(); + +      // If there is only one successor then we can not set a branch +      // probability as the target is certain. +      if (NumSuccessors < 2) continue; + +      // Load the weights of all edges leading from this terminator. +      DEBUG(dbgs() << "-- Terminator with " << NumSuccessors +                   << " successors:\n"); +      std::vector<uint32_t> Weights(NumSuccessors); +      for (unsigned s = 0 ; s < NumSuccessors ; ++s) { +          ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); +          Weights[s] = (uint32_t)PB.getEdgeWeight(edge); +          DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " +                       << Weights[s] << "\n"); +      } + +      // Set branch weight metadata.  This will set branch probabilities of +      // 100%/0% if that is true of the dynamic execution. +      // BranchProbabilityInfo can account for this when it loads this metadata +      // (it gives the unexectuted branch a weight of 1 for the purposes of +      // probability calculations). +      MDBuilder MDB(TI->getContext()); +      MDNode *Node = MDB.createBranchWeights(Weights); +      TI->setMetadata(LLVMContext::MD_prof, Node); +      NumTermsAnnotated++; +    } +  } +} + +bool ProfileMetadataLoaderPass::runOnModule(Module &M) { +  ProfileDataLoader PDL("profile-data-loader", Filename); +  ProfileData PB; + +  std::vector<unsigned> Counters = PDL.getRawEdgeCounts(); + +  unsigned ReadCount = matchEdges(M, PB, Counters); + +  if (ReadCount != Counters.size()) { +    errs() << "WARNING: profile information is inconsistent with " +           << "the current program!\n"; +  } +  NumEdgesRead = ReadCount; + +  setBranchWeightMetadata(M, PB); + +  return ReadCount > 0; +} diff --git a/llvm/lib/Analysis/ProfileInfo.cpp b/llvm/lib/Analysis/ProfileInfo.cpp index 173de2c0279..b5b7ac1e501 100644 --- a/llvm/lib/Analysis/ProfileInfo.cpp +++ b/llvm/lib/Analysis/ProfileInfo.cpp @@ -1016,40 +1016,14 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {    }  } -raw_ostream& operator<<(raw_ostream &O, const Function *F) { -  return O << F->getName(); -} -  raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {    return O << MF->getFunction()->getName() << "(MF)";  } -raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { -  return O << BB->getName(); -} -  raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {    return O << MBB->getBasicBlock()->getName() << "(MB)";  } -raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { -  O << "("; - -  if (E.first) -    O << E.first; -  else -    O << "0"; - -  O << ","; - -  if (E.second) -    O << E.second; -  else -    O << "0"; - -  return O << ")"; -} -  raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {    O << "("; diff --git a/llvm/test/Analysis/Profiling/load-branch-weights-ifs.ll b/llvm/test/Analysis/Profiling/load-branch-weights-ifs.ll new file mode 100644 index 00000000000..b5ee2f34660 --- /dev/null +++ b/llvm/test/Analysis/Profiling/load-branch-weights-ifs.ll @@ -0,0 +1,119 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN:     -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN:     | FileCheck %s +; RUN: rm -f %t1.prof_data + +;; func_mod - Branch taken 6 times in 7. +define i32 @func_mod(i32 %N) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %N.addr = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  %0 = load i32* %N.addr, align 4 +  %rem = srem i32 %0, 7 +  %tobool = icmp ne i32 %rem, 0 +  br i1 %tobool, label %if.then, label %if.else +; CHECK: br i1 %tobool, label %if.then, label %if.else, !prof !0 + +if.then: +  store i32 1, i32* %retval +  br label %return + +if.else: +  store i32 0, i32* %retval +  br label %return + +return: +  %1 = load i32* %retval +  ret i32 %1 +} + +;; func_const_true - conditional branch which 100% taken probability. +define i32 @func_const_true(i32 %N) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %N.addr = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  %0 = load i32* %N.addr, align 4 +  %cmp = icmp eq i32 %0, 1 +  br i1 %cmp, label %if.then, label %if.end +; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: +  store i32 1, i32* %retval +  br label %return + +if.end: +  store i32 0, i32* %retval +  br label %return + +return: +  %1 = load i32* %retval +  ret i32 %1 +} + +;; func_const_true - conditional branch which 100% not-taken probability. +define i32 @func_const_false(i32 %N) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %N.addr = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  %0 = load i32* %N.addr, align 4 +  %cmp = icmp eq i32 %0, 1 +  br i1 %cmp, label %if.then, label %if.end +; CHECK: br i1 %cmp, label %if.then, label %if.end, !prof !2 + +if.then: +  store i32 1, i32* %retval +  br label %return + +if.end: +  store i32 0, i32* %retval +  br label %return + +return: +  %1 = load i32* %retval +  ret i32 %1 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %argc.addr = alloca i32, align 4 +  %argv.addr = alloca i8**, align 8 +  %loop = alloca i32, align 4 +  store i32 0, i32* %retval +  store i32 0, i32* %loop, align 4 +  br label %for.cond + +for.cond: +  %0 = load i32* %loop, align 4 +  %cmp = icmp slt i32 %0, 7000 +  br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !3 + +for.body: +  %1 = load i32* %loop, align 4 +  %call = call i32 @func_mod(i32 %1) +  br label %for.inc + +for.inc: +  %2 = load i32* %loop, align 4 +  %inc = add nsw i32 %2, 1 +  store i32 %inc, i32* %loop, align 4 +  br label %for.cond + +for.end: +  %call1 = call i32 @func_const_true(i32 1) +  %call2 = call i32 @func_const_false(i32 0) +  ret i32 0 +} + +; CHECK: !0 = metadata !{metadata !"branch_weights", i32 6000, i32 1000} +; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 0} +; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1} +; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7000, i32 1} +; CHECK-NOT: !4 diff --git a/llvm/test/Analysis/Profiling/load-branch-weights-loops.ll b/llvm/test/Analysis/Profiling/load-branch-weights-loops.ll new file mode 100644 index 00000000000..26c89b7c9bf --- /dev/null +++ b/llvm/test/Analysis/Profiling/load-branch-weights-loops.ll @@ -0,0 +1,185 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN:     -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN:     | FileCheck %s +; RUN: rm -f %t1.prof_data + +;; func_for - Test branch probabilities for a vanilla for loop. +define i32 @func_for(i32 %N) nounwind uwtable { +entry: +  %N.addr = alloca i32, align 4 +  %ret = alloca i32, align 4 +  %loop = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  store i32 0, i32* %ret, align 4 +  store i32 0, i32* %loop, align 4 +  br label %for.cond + +for.cond: +  %0 = load i32* %loop, align 4 +  %1 = load i32* %N.addr, align 4 +  %cmp = icmp slt i32 %0, %1 +  br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !0 + +for.body: +  %2 = load i32* %N.addr, align 4 +  %3 = load i32* %ret, align 4 +  %add = add nsw i32 %3, %2 +  store i32 %add, i32* %ret, align 4 +  br label %for.inc + +for.inc: +  %4 = load i32* %loop, align 4 +  %inc = add nsw i32 %4, 1 +  store i32 %inc, i32* %loop, align 4 +  br label %for.cond + +for.end: +  %5 = load i32* %ret, align 4 +  ret i32 %5 +} + +;; func_for_odd - Test branch probabilities for a for loop with a continue and +;; a break. +define i32 @func_for_odd(i32 %N) nounwind uwtable { +entry: +  %N.addr = alloca i32, align 4 +  %ret = alloca i32, align 4 +  %loop = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  store i32 0, i32* %ret, align 4 +  store i32 0, i32* %loop, align 4 +  br label %for.cond + +for.cond: +  %0 = load i32* %loop, align 4 +  %1 = load i32* %N.addr, align 4 +  %cmp = icmp slt i32 %0, %1 +  br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !1 + +for.body: +  %2 = load i32* %loop, align 4 +  %rem = srem i32 %2, 10 +  %tobool = icmp ne i32 %rem, 0 +  br i1 %tobool, label %if.then, label %if.end +; CHECK: br i1 %tobool, label %if.then, label %if.end, !prof !2 + +if.then: +  br label %for.inc + +if.end: +  %3 = load i32* %loop, align 4 +  %cmp1 = icmp eq i32 %3, 500 +  br i1 %cmp1, label %if.then2, label %if.end3 +; CHECK: br i1 %cmp1, label %if.then2, label %if.end3, !prof !3 + +if.then2: +  br label %for.end + +if.end3: +  %4 = load i32* %N.addr, align 4 +  %5 = load i32* %ret, align 4 +  %add = add nsw i32 %5, %4 +  store i32 %add, i32* %ret, align 4 +  br label %for.inc + +for.inc: +  %6 = load i32* %loop, align 4 +  %inc = add nsw i32 %6, 1 +  store i32 %inc, i32* %loop, align 4 +  br label %for.cond + +for.end: +  %7 = load i32* %ret, align 4 +  ret i32 %7 +} + +;; func_while - Test branch probability in a vanilla while loop. +define i32 @func_while(i32 %N) nounwind uwtable { +entry: +  %N.addr = alloca i32, align 4 +  %ret = alloca i32, align 4 +  %loop = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  store i32 0, i32* %ret, align 4 +  store i32 0, i32* %loop, align 4 +  br label %while.cond + +while.cond: +  %0 = load i32* %loop, align 4 +  %1 = load i32* %N.addr, align 4 +  %cmp = icmp slt i32 %0, %1 +  br i1 %cmp, label %while.body, label %while.end +; CHECK: br i1 %cmp, label %while.body, label %while.end, !prof !0 + +while.body: +  %2 = load i32* %N.addr, align 4 +  %3 = load i32* %ret, align 4 +  %add = add nsw i32 %3, %2 +  store i32 %add, i32* %ret, align 4 +  %4 = load i32* %loop, align 4 +  %inc = add nsw i32 %4, 1 +  store i32 %inc, i32* %loop, align 4 +  br label %while.cond + +while.end: +  %5 = load i32* %ret, align 4 +  ret i32 %5 +} + +;; func_while - Test branch probability in a vanilla do-while loop. +define i32 @func_do_while(i32 %N) nounwind uwtable { +entry: +  %N.addr = alloca i32, align 4 +  %ret = alloca i32, align 4 +  %loop = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  store i32 0, i32* %ret, align 4 +  store i32 0, i32* %loop, align 4 +  br label %do.body + +do.body: +  %0 = load i32* %N.addr, align 4 +  %1 = load i32* %ret, align 4 +  %add = add nsw i32 %1, %0 +  store i32 %add, i32* %ret, align 4 +  %2 = load i32* %loop, align 4 +  %inc = add nsw i32 %2, 1 +  store i32 %inc, i32* %loop, align 4 +  br label %do.cond + +do.cond: +  %3 = load i32* %loop, align 4 +  %4 = load i32* %N.addr, align 4 +  %cmp = icmp slt i32 %3, %4 +  br i1 %cmp, label %do.body, label %do.end +; CHECK: br i1 %cmp, label %do.body, label %do.end, !prof !4 + +do.end: +  %5 = load i32* %ret, align 4 +  ret i32 %5 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %argc.addr = alloca i32, align 4 +  %argv.addr = alloca i8**, align 8 +  store i32 0, i32* %retval +  %call = call i32 @func_for(i32 1000) +  %call1 = call i32 @func_for_odd(i32 1000) +  %call2 = call i32 @func_while(i32 1000) +  %call3 = call i32 @func_do_while(i32 1000) +  ret i32 0 +} + +!0 = metadata !{metadata !"branch_weights", i32 1000, i32 1} +!1 = metadata !{metadata !"branch_weights", i32 501, i32 0} +!2 = metadata !{metadata !"branch_weights", i32 450, i32 51} +!3 = metadata !{metadata !"branch_weights", i32 1, i32 50} +!4 = metadata !{metadata !"branch_weights", i32 999, i32 1} +; CHECK-NOT: !5 diff --git a/llvm/test/Analysis/Profiling/load-branch-weights-switches.ll b/llvm/test/Analysis/Profiling/load-branch-weights-switches.ll new file mode 100644 index 00000000000..9efe6d0fe4e --- /dev/null +++ b/llvm/test/Analysis/Profiling/load-branch-weights-switches.ll @@ -0,0 +1,162 @@ +; RUN: opt -insert-edge-profiling -o %t1 < %s +; RUN: rm -f %t1.prof_data +; RUN: lli -load %llvmshlibdir/libprofile_rt%shlibext %t1 \ +; RUN:     -llvmprof-output %t1.prof_data +; RUN: opt -profile-file %t1.prof_data -profile-metadata-loader -S -o - < %s \ +; RUN:     | FileCheck %s +; RUN: rm -f %t1.prof_data + +;; func_switch - Test branch probabilities for a switch instruction with an +;; even chance of taking each case (or no case). +define i32 @func_switch(i32 %N) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %N.addr = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  %0 = load i32* %N.addr, align 4 +  %rem = srem i32 %0, 4 +  switch i32 %rem, label %sw.epilog [ +    i32 0, label %sw.bb +    i32 1, label %sw.bb1 +    i32 2, label %sw.bb2 +  ] +; CHECK: ], !prof !0 + +sw.bb: +  store i32 5, i32* %retval +  br label %return + +sw.bb1: +  store i32 6, i32* %retval +  br label %return + +sw.bb2: +  store i32 7, i32* %retval +  br label %return + +sw.epilog: +  store i32 8, i32* %retval +  br label %return + +return: +  %1 = load i32* %retval +  ret i32 %1 +} + +;; func_switch_switch - Test branch probabilities in a switch-instruction that +;; leads to further switch instructions.  The first-tier switch occludes some +;; possibilities in the second-tier switches, leading to some branches having a +;; 0 probability. +define i32 @func_switch_switch(i32 %N) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %N.addr = alloca i32, align 4 +  store i32 %N, i32* %N.addr, align 4 +  %0 = load i32* %N.addr, align 4 +  %rem = srem i32 %0, 2 +  switch i32 %rem, label %sw.default11 [ +    i32 0, label %sw.bb +    i32 1, label %sw.bb5 +  ] +; CHECK: ], !prof !1 + +sw.bb: +  %1 = load i32* %N.addr, align 4 +  %rem1 = srem i32 %1, 4 +  switch i32 %rem1, label %sw.default [ +    i32 0, label %sw.bb2 +    i32 1, label %sw.bb3 +    i32 2, label %sw.bb4 +  ] +; CHECK: ], !prof !2 + +sw.bb2: +  store i32 5, i32* %retval +  br label %return + +sw.bb3: +  store i32 6, i32* %retval +  br label %return + +sw.bb4: +  store i32 7, i32* %retval +  br label %return + +sw.default: +  store i32 8, i32* %retval +  br label %return + +sw.bb5: +  %2 = load i32* %N.addr, align 4 +  %rem6 = srem i32 %2, 4 +  switch i32 %rem6, label %sw.default10 [ +    i32 0, label %sw.bb7 +    i32 1, label %sw.bb8 +    i32 2, label %sw.bb9 +  ] +; CHECK: ], !prof !3 + +sw.bb7: +  store i32 9, i32* %retval +  br label %return + +sw.bb8: +  store i32 10, i32* %retval +  br label %return + +sw.bb9: +  store i32 11, i32* %retval +  br label %return + +sw.default10: +  store i32 12, i32* %retval +  br label %return + +sw.default11: +  store i32 13, i32* %retval +  br label %return + +return: +  %3 = load i32* %retval +  ret i32 %3 +} + +define i32 @main(i32 %argc, i8** %argv) nounwind uwtable { +entry: +  %retval = alloca i32, align 4 +  %argc.addr = alloca i32, align 4 +  %argv.addr = alloca i8**, align 8 +  %loop = alloca i32, align 4 +  store i32 0, i32* %retval +  store i32 0, i32* %loop, align 4 +  br label %for.cond + +for.cond: +  %0 = load i32* %loop, align 4 +  %cmp = icmp slt i32 %0, 4000 +  br i1 %cmp, label %for.body, label %for.end +; CHECK: br i1 %cmp, label %for.body, label %for.end, !prof !4 + +for.body: +  %1 = load i32* %loop, align 4 +  %call = call i32 @func_switch(i32 %1) +  %2 = load i32* %loop, align 4 +  %call1 = call i32 @func_switch_switch(i32 %2) +  br label %for.inc + +for.inc: +  %3 = load i32* %loop, align 4 +  %inc = add nsw i32 %3, 1 +  store i32 %inc, i32* %loop, align 4 +  br label %for.cond + +for.end: +  ret i32 0 +} + +; CHECK: !0 = metadata !{metadata !"branch_weights", i32 1000, i32 1000, i32 1000, i32 1000} +; CHECK: !1 = metadata !{metadata !"branch_weights", i32 0, i32 2000, i32 2000} +; CHECK: !2 = metadata !{metadata !"branch_weights", i32 0, i32 1000, i32 0, i32 1000} +; CHECK: !3 = metadata !{metadata !"branch_weights", i32 1000, i32 0, i32 1000, i32 0} +; CHECK: !4 = metadata !{metadata !"branch_weights", i32 4000, i32 1} +; CHECK-NOT: !5  | 

