summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
diff options
context:
space:
mode:
authorHiroshi Yamauchi <yamauchi@google.com>2018-09-04 17:19:13 +0000
committerHiroshi Yamauchi <yamauchi@google.com>2018-09-04 17:19:13 +0000
commit9775a620b0f6f5d3379eead8fdcfa3465782caf5 (patch)
tree1e7707069bcc7751811291f5c3448c90d15e8ed5 /llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
parent24568789c4160182c44fa382138a19943f244083 (diff)
downloadbcm5719-llvm-9775a620b0f6f5d3379eead8fdcfa3465782caf5.tar.gz
bcm5719-llvm-9775a620b0f6f5d3379eead8fdcfa3465782caf5.zip
[PGO] Control Height Reduction
Summary: Control height reduction merges conditional blocks of code and reduces the number of conditional branches in the hot path based on profiles. if (hot_cond1) { // Likely true. do_stg_hot1(); } if (hot_cond2) { // Likely true. do_stg_hot2(); } -> if (hot_cond1 && hot_cond2) { // Hot path. do_stg_hot1(); do_stg_hot2(); } else { // Cold path. if (hot_cond1) { do_stg_hot1(); } if (hot_cond2) { do_stg_hot2(); } } This speeds up some internal benchmarks up to ~30%. Reviewers: davidxl Reviewed By: davidxl Subscribers: xbolva00, dmgreen, mehdi_amini, llvm-commits, mgorny Differential Revision: https://reviews.llvm.org/D50591 llvm-svn: 341386
Diffstat (limited to 'llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp')
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp2010
1 files changed, 2010 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
new file mode 100644
index 00000000000..4d925c89ef1
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -0,0 +1,2010 @@
+//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass merges conditional blocks of code and reduces the number of
+// conditional branches in the hot paths based on profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include <cxxabi.h>
+#include <set>
+#include <sstream>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "chr"
+
+#define CHR_DEBUG(X) LLVM_DEBUG(X)
+
+static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
+ cl::desc("Apply CHR for all functions"));
+
+static cl::opt<double> CHRBiasThreshold(
+ "chr-bias-threshold", cl::init(0.99), cl::Hidden,
+ cl::desc("CHR considers a branch bias greater than this ratio as biased"));
+
+static cl::opt<unsigned> CHRMergeThreshold(
+ "chr-merge-threshold", cl::init(2), cl::Hidden,
+ cl::desc("CHR merges a group of N branches/selects where N >= this value"));
+
+static cl::opt<std::string> CHRModuleList(
+ "chr-module-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
+
+static cl::opt<std::string> CHRFunctionList(
+ "chr-function-list", cl::init(""), cl::Hidden,
+ cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
+
+static StringSet<> CHRModules;
+static StringSet<> CHRFunctions;
+
+static void ParseCHRFilterFiles() {
+ if (!CHRModuleList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRModules.insert(Line);
+ }
+ }
+ if (!CHRFunctionList.empty()) {
+ auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
+ if (!FileOrErr) {
+ errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
+ std::exit(1);
+ }
+ StringRef Buf = FileOrErr->get()->getBuffer();
+ SmallVector<StringRef, 0> Lines;
+ Buf.split(Lines, '\n');
+ for (StringRef Line : Lines) {
+ Line = Line.trim();
+ if (!Line.empty())
+ CHRFunctions.insert(Line);
+ }
+ }
+}
+
+namespace {
+class ControlHeightReductionLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ ControlHeightReductionLegacyPass() : FunctionPass(ID) {
+ initializeControlHeightReductionLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ ParseCHRFilterFiles();
+ }
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<RegionInfoPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+} // end anonymous namespace
+
+char ControlHeightReductionLegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
+INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
+ "chr",
+ "Reduce control height in the hot paths",
+ false, false)
+
+FunctionPass *llvm::createControlHeightReductionLegacyPass() {
+ return new ControlHeightReductionLegacyPass();
+}
+
+namespace {
+
+struct CHRStats {
+ CHRStats() : NumBranches(0), NumBranchesDelta(0),
+ WeightedNumBranchesDelta(0) {}
+ void print(raw_ostream &OS) const {
+ OS << "CHRStats: NumBranches " << NumBranches
+ << " NumBranchesDelta " << NumBranchesDelta
+ << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
+ }
+ uint64_t NumBranches; // The original number of conditional branches /
+ // selects
+ uint64_t NumBranchesDelta; // The decrease of the number of conditional
+ // branches / selects in the hot paths due to CHR.
+ uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
+ // count at the scope entry.
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) {
+ Stats.print(OS);
+ return OS;
+}
+
+// RegInfo - some properties of a Region.
+struct RegInfo {
+ RegInfo() : R(nullptr), HasBranch(false) {}
+ RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
+ Region *R;
+ bool HasBranch;
+ SmallVector<SelectInst *, 8> Selects;
+};
+
+typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
+
+// CHRScope - a sequence of regions to CHR together. It corresponds to a
+// sequence of conditional blocks. It can have subscopes which correspond to
+// nested conditional blocks. Nested CHRScopes form a tree.
+class CHRScope {
+ public:
+ CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
+ assert(RI.R && "Null RegionIn");
+ RegInfos.push_back(RI);
+ }
+
+ Region *getParentRegion() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ Region *Parent = RegInfos[0].R->getParent();
+ assert(Parent && "Unexpected to call this on the top-level region");
+ return Parent;
+ }
+
+ BasicBlock *getEntryBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.front().R->getEntry();
+ }
+
+ BasicBlock *getExitBlock() {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ return RegInfos.back().R->getExit();
+ }
+
+ bool appendable(CHRScope *Next) {
+ // The next scope is appendable only if this scope is directly connected to
+ // it (which implies it post-dominates this scope) and this scope dominates
+ // it (no edge to the next scope outside this scope).
+ BasicBlock *NextEntry = Next->getEntryBlock();
+ if (getExitBlock() != NextEntry)
+ // Not directly connected.
+ return false;
+ Region *LastRegion = RegInfos.back().R;
+ for (BasicBlock *Pred : predecessors(NextEntry))
+ if (!LastRegion->contains(Pred))
+ // There's an edge going into the entry of the next scope from outside
+ // of this scope.
+ return false;
+ return true;
+ }
+
+ void append(CHRScope *Next) {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
+ assert(getParentRegion() == Next->getParentRegion() &&
+ "Must be siblings");
+ assert(getExitBlock() == Next->getEntryBlock() &&
+ "Must be adjacent");
+ for (RegInfo &RI : Next->RegInfos)
+ RegInfos.push_back(RI);
+ for (CHRScope *Sub : Next->Subs)
+ Subs.push_back(Sub);
+ }
+
+ void addSub(CHRScope *SubIn) {
+#ifndef NDEBUG
+ bool is_child = false;
+ for (RegInfo &RI : RegInfos)
+ if (RI.R == SubIn->getParentRegion()) {
+ is_child = true;
+ break;
+ }
+ assert(is_child && "Must be a child");
+#endif
+ Subs.push_back(SubIn);
+ }
+
+ // Split this scope at the boundary region into two, which will belong to the
+ // tail and returns the tail.
+ CHRScope *split(Region *Boundary) {
+ assert(Boundary && "Boundary null");
+ assert(RegInfos.begin()->R != Boundary &&
+ "Can't be split at beginning");
+ auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Boundary](const RegInfo& RI) {
+ return Boundary == RI.R;
+ });
+ if (BoundaryIt == RegInfos.end())
+ return nullptr;
+ SmallVector<RegInfo, 8> TailRegInfos;
+ SmallVector<CHRScope *, 8> TailSubs;
+ TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
+ RegInfos.resize(BoundaryIt - RegInfos.begin());
+ DenseSet<Region *> TailRegionSet;
+ for (RegInfo &RI : TailRegInfos)
+ TailRegionSet.insert(RI.R);
+ for (auto It = Subs.begin(); It != Subs.end(); ) {
+ CHRScope *Sub = *It;
+ assert(Sub && "null Sub");
+ Region *Parent = Sub->getParentRegion();
+ if (TailRegionSet.count(Parent)) {
+ TailSubs.push_back(Sub);
+ It = Subs.erase(It);
+ } else {
+ assert(std::find_if(RegInfos.begin(), RegInfos.end(),
+ [&Parent](const RegInfo& RI) {
+ return Parent == RI.R;
+ }) != RegInfos.end() &&
+ "Must be in head");
+ ++It;
+ }
+ }
+ assert(HoistStopMap.empty() && "MapHoistStops must be empty");
+ return new CHRScope(TailRegInfos, TailSubs);
+ }
+
+ bool contains(Instruction *I) const {
+ BasicBlock *Parent = I->getParent();
+ for (const RegInfo &RI : RegInfos)
+ if (RI.R->contains(Parent))
+ return true;
+ return false;
+ }
+
+ void print(raw_ostream &OS) const;
+
+ SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
+ SmallVector<CHRScope *, 8> Subs; // Subscopes.
+
+ // The instruction at which to insert the CHR conditional branch (and hoist
+ // the dependent condition values).
+ Instruction *BranchInsertPoint;
+
+ // True-biased and false-biased regions (conditional blocks),
+ // respectively. Used only for the outermost scope and includes regions in
+ // subscopes. The rest are unbiased.
+ DenseSet<Region *> TrueBiasedRegions;
+ DenseSet<Region *> FalseBiasedRegions;
+ // Among the biased regions, the regions that get CHRed.
+ SmallVector<RegInfo, 8> CHRRegions;
+
+ // True-biased and false-biased selects, respectively. Used only for the
+ // outermost scope and includes ones in subscopes.
+ DenseSet<SelectInst *> TrueBiasedSelects;
+ DenseSet<SelectInst *> FalseBiasedSelects;
+
+ // Map from one of the above regions to the instructions to stop
+ // hoisting instructions at through use-def chains.
+ HoistStopMapTy HoistStopMap;
+
+ private:
+ CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
+ SmallVector<CHRScope *, 8> &SubsIn)
+ : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
+ Scope.print(OS);
+ return OS;
+}
+
+class CHR {
+ public:
+ CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
+ ProfileSummaryInfo &PSIin, RegionInfo &RIin)
+ : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
+
+ ~CHR() {
+ for (CHRScope *Scope : Scopes) {
+ delete Scope;
+ }
+ }
+
+ bool run();
+
+ private:
+ // See the comments in CHR::run() for the high level flow of the algorithm and
+ // what the following functions do.
+
+ void findScopes(SmallVectorImpl<CHRScope *> &Output) {
+ Region *R = RI.getTopLevelRegion();
+ CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
+ if (Scope) {
+ Output.push_back(Scope);
+ }
+ }
+ CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes);
+ CHRScope *findScope(Region *R);
+ void checkScopeHoistable(CHRScope *Scope);
+
+ void splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables);
+
+ void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
+ void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+ void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
+
+ void sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output);
+
+ void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
+ void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
+ void cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap);
+ BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap);
+ void fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount);
+ void fixupBranch(Region *R,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void fixupSelect(SelectInst* SI,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition, BranchProbability &CHRBranchBias);
+ void addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition);
+
+ Function &F;
+ BlockFrequencyInfo &BFI;
+ DominatorTree &DT;
+ ProfileSummaryInfo &PSI;
+ RegionInfo &RI;
+ CHRStats Stats;
+
+ // All the true-biased regions in the function
+ DenseSet<Region *> TrueBiasedRegionsGlobal;
+ // All the false-biased regions in the function
+ DenseSet<Region *> FalseBiasedRegionsGlobal;
+ // All the true-biased selects in the function
+ DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
+ // All the false-biased selects in the function
+ DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
+ // A map from biased regions to their branch bias
+ DenseMap<Region *, BranchProbability> BranchBiasMap;
+ // A map from biased selects to their branch bias
+ DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
+ // All the scopes.
+ DenseSet<CHRScope *> Scopes;
+};
+
+} // end anonymous namespace
+
+static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
+ if (ForceCHR)
+ return true;
+
+ if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
+ if (CHRModules.count(F.getParent()->getName()))
+ return true;
+ StringRef Name = F.getName();
+ if (CHRFunctions.count(Name))
+ return true;
+ const char* DemangledName = nullptr;
+ int Status = -1;
+ DemangledName = abi::__cxa_demangle(Name.str().c_str(),
+ nullptr, nullptr, &Status);
+ return DemangledName && CHRFunctions.count(DemangledName);
+ }
+
+ assert(PSI.hasProfileSummary() && "Empty PSI?");
+ return PSI.isFunctionEntryHot(&F);
+}
+
+static void dumpIR(Function &F, const char *Label, CHRStats *Stats) {
+ std::string Name = F.getName().str();
+ const char *DemangledName = nullptr;
+ int Status = -1;
+ DemangledName = abi::__cxa_demangle(Name.c_str(),
+ nullptr, nullptr, &Status);
+ if (DemangledName == nullptr) {
+ DemangledName = "<NOT-MANGLED>";
+ }
+ std::string ModuleName = F.getParent()->getName().str();
+ CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
+ << Name);
+ if (Stats)
+ CHR_DEBUG(dbgs() << " " << *Stats);
+ CHR_DEBUG(dbgs() << "\n");
+ CHR_DEBUG(F.dump());
+}
+
+
+void CHRScope::print(raw_ostream &OS) const {
+ assert(RegInfos.size() > 0 && "Empty CHRScope");
+ OS << "CHRScope[";
+ OS << RegInfos.size() << ", Regions[";
+ for (const RegInfo &RI : RegInfos) {
+ OS << RI.R->getNameStr();
+ if (RI.HasBranch)
+ OS << " B";
+ if (RI.Selects.size() > 0)
+ OS << " S" << RI.Selects.size();
+ OS << ", ";
+ }
+ if (RegInfos[0].R->getParent()) {
+ OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
+ } else {
+ // top level region
+ OS << "]";
+ }
+ OS << ", Subs[";
+ for (CHRScope *Sub : Subs) {
+ OS << *Sub << ", ";
+ }
+ OS << "]]";
+}
+
+// Return true if the given instruction type can be hoisted by CHR.
+static bool isHoistableInstructionType(Instruction *I) {
+ return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
+ isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+ isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+ isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
+ isa<InsertValueInst>(I);
+}
+
+// Return true if the given instruction can be hoisted by CHR.
+static bool isHoistable(Instruction *I, DominatorTree &DT) {
+ if (!isHoistableInstructionType(I))
+ return false;
+ return isSafeToSpeculativelyExecute(I, nullptr, &DT);
+}
+
+// Recursively traverse the use-def chains of the given value and return a set
+// of the unhoistable base values defined within the scope (excluding the
+// first-region entry block) or the (hoistable or unhoistable) base values that
+// are defined outside (including the first-region entry block) of the
+// scope. The returned set doesn't include constants.
+static std::set<Value *> getBaseValues(Value *V,
+ DominatorTree &DT) {
+ std::set<Value *> Result;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // We don't stop at a block that's not in the Scope because we would miss some
+ // instructions that are based on the same base values if we stop there.
+ if (!isHoistable(I, DT)) {
+ Result.insert(I);
+ return Result;
+ }
+ // I is hoistable above the Scope.
+ for (Value *Op : I->operands()) {
+ std::set<Value *> OpResult = getBaseValues(Op, DT);
+ Result.insert(OpResult.begin(), OpResult.end());
+ }
+ return Result;
+ }
+ if (isa<Argument>(V)) {
+ Result.insert(V);
+ return Result;
+ }
+ // We don't include others like constants because those won't lead to any
+ // chance of folding of conditions (eg two bit checks merged into one check)
+ // after CHR.
+ return Result; // empty
+}
+
+// Return true if V is already hoisted or can be hoisted (along with its
+// operands) above the insert point. When it returns true and HoistStops is
+// non-null, the instructions to stop hoisting at through the use-def chains are
+// inserted into HoistStops.
+static bool
+checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables,
+ DenseSet<Instruction *> *HoistStops) {
+ assert(InsertPoint && "Null InsertPoint");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
+ assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
+ if (Unhoistables.count(I)) {
+ // Don't hoist if they are not to be hoisted.
+ return false;
+ }
+ if (DT.dominates(I, InsertPoint)) {
+ // We are already above the insert point. Stop here.
+ if (HoistStops)
+ HoistStops->insert(I);
+ return true;
+ }
+ // We aren't not above the insert point, check if we can hoist it above the
+ // insert point.
+ if (isHoistable(I, DT)) {
+ // Check operands first.
+ DenseSet<Instruction *> OpsHoistStops;
+ bool AllOpsHoisted = true;
+ for (Value *Op : I->operands()) {
+ if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
+ AllOpsHoisted = false;
+ break;
+ }
+ }
+ if (AllOpsHoisted) {
+ CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
+ if (HoistStops)
+ HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
+ return true;
+ }
+ }
+ return false;
+ }
+ // Non-instructions are considered hoistable.
+ return true;
+}
+
+// Returns true and sets the true probability and false probability of an
+// MD_prof metadata if it's well-formed.
+static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
+ BranchProbability &FalseProb) {
+ if (!MD) return false;
+ MDString *MDName = cast<MDString>(MD->getOperand(0));
+ if (MDName->getString() != "branch_weights" ||
+ MD->getNumOperands() != 3)
+ return false;
+ ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
+ ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
+ if (!TrueWeight || !FalseWeight)
+ return false;
+ APInt TrueWt = TrueWeight->getValue();
+ APInt FalseWt = FalseWeight->getValue();
+ APInt SumWt = TrueWt + FalseWt;
+ TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
+ SumWt.getZExtValue());
+ FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
+ SumWt.getZExtValue());
+ return true;
+}
+
+static BranchProbability getCHRBiasThreshold() {
+ return BranchProbability::getBranchProbability(
+ static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
+}
+
+// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
+// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
+// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
+// false.
+template<typename K, typename S, typename M>
+bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
+ S &TrueSet, S &FalseSet, M &BiasMap) {
+ BranchProbability Threshold = getCHRBiasThreshold();
+ if (TrueProb >= Threshold) {
+ TrueSet.insert(Key);
+ BiasMap[Key] = TrueProb;
+ return true;
+ } else if (FalseProb >= Threshold) {
+ FalseSet.insert(Key);
+ BiasMap[Key] = FalseProb;
+ return true;
+ }
+ return false;
+}
+
+// Returns true and insert a region into the right biased set and the map if the
+// branch of the region is biased.
+static bool CheckBiasedBranch(BranchInst *BI, Region *R,
+ DenseSet<Region *> &TrueBiasedRegionsGlobal,
+ DenseSet<Region *> &FalseBiasedRegionsGlobal,
+ DenseMap<Region *, BranchProbability> &BranchBiasMap) {
+ if (!BI->isConditional())
+ return false;
+ BranchProbability ThenProb, ElseProb;
+ if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
+ ThenProb, ElseProb))
+ return false;
+ BasicBlock *IfThen = BI->getSuccessor(0);
+ BasicBlock *IfElse = BI->getSuccessor(1);
+ assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
+ IfThen != IfElse &&
+ "Invariant from findScopes");
+ if (IfThen == R->getExit()) {
+ // Swap them so that IfThen/ThenProb means going into the conditional code
+ // and IfElse/ElseProb means skipping it.
+ std::swap(IfThen, IfElse);
+ std::swap(ThenProb, ElseProb);
+ }
+ CHR_DEBUG(dbgs() << "BI " << *BI << " ");
+ CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
+ CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
+ return CheckBias(R, ThenProb, ElseProb,
+ TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+}
+
+// Returns true and insert a select into the right biased set and the map if the
+// select is biased.
+static bool CheckBiasedSelect(
+ SelectInst *SI, Region *R,
+ DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
+ DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
+ DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
+ BranchProbability TrueProb, FalseProb;
+ if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
+ TrueProb, FalseProb))
+ return false;
+ CHR_DEBUG(dbgs() << "SI " << *SI << " ");
+ CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
+ CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
+ return CheckBias(SI, TrueProb, FalseProb,
+ TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
+ SelectBiasMap);
+}
+
+// Returns the instruction at which to hoist the dependent condition values and
+// insert the CHR branch for a region. This is the terminator branch in the
+// entry block or the first select in the entry block, if any.
+static Instruction* getBranchInsertPoint(RegInfo &RI) {
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ // The hoist point is by default the terminator of the entry block, which is
+ // the same as the branch instruction if RI.HasBranch is true.
+ Instruction *HoistPoint = EntryBB->getTerminator();
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ // Pick the first select in Selects in the entry block. Note Selects is
+ // sorted in the instruction order within a block (asserted below).
+ HoistPoint = SI;
+ break;
+ }
+ }
+ assert(HoistPoint && "Null HoistPoint");
+#ifndef NDEBUG
+ // Check that HoistPoint is the first one in Selects in the entry block,
+ // if any.
+ DenseSet<Instruction *> EntryBlockSelectSet;
+ for (SelectInst *SI : RI.Selects) {
+ if (SI->getParent() == EntryBB) {
+ EntryBlockSelectSet.insert(SI);
+ }
+ }
+ for (Instruction &I : *EntryBB) {
+ if (EntryBlockSelectSet.count(&I) > 0) {
+ assert(&I == HoistPoint &&
+ "HoistPoint must be the first one in Selects");
+ break;
+ }
+ }
+#endif
+ return HoistPoint;
+}
+
+// Find a CHR scope in the given region.
+CHRScope * CHR::findScope(Region *R) {
+ CHRScope *Result = nullptr;
+ BasicBlock *Entry = R->getEntry();
+ BasicBlock *Exit = R->getExit(); // null if top level.
+ assert(Entry && "Entry must not be null");
+ assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
+ "Only top level region has a null exit");
+ if (Entry)
+ CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Entry null\n");
+ if (Exit)
+ CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "Exit null\n");
+ // Exclude cases where Entry is part of a subregion (hence it doesn't belong
+ // to this region).
+ bool EntryInSubregion = RI.getRegionFor(Entry) != R;
+ if (EntryInSubregion)
+ return nullptr;
+ // Exclude loops
+ for (BasicBlock *Pred : predecessors(Entry))
+ if (R->contains(Pred))
+ return nullptr;
+ if (Exit) {
+ // Try to find an if-then block (check if R is an if-then).
+ // if (cond) {
+ // ...
+ // }
+ auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
+ if (BI)
+ CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
+ else
+ CHR_DEBUG(dbgs() << "BI null\n");
+ if (BI && BI->isConditional()) {
+ BasicBlock *S0 = BI->getSuccessor(0);
+ BasicBlock *S1 = BI->getSuccessor(1);
+ CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
+ CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
+ if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
+ RegInfo RI(R);
+ RI.HasBranch = CheckBiasedBranch(
+ BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
+ BranchBiasMap);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ CHR_DEBUG(dbgs() << "Found a region with a branch\n");
+ ++Stats.NumBranches;
+ }
+ }
+ }
+ {
+ // Try to look for selects in the direct child blocks (as opposed to in
+ // subregions) of R.
+ // ...
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // if (..) { // Some subregion
+ // ...
+ // }
+ // ...
+ // a = cond ? b : c;
+ // ...
+ SmallVector<SelectInst *, 8> Selects;
+ for (RegionNode *E : R->elements()) {
+ if (E->isSubRegion())
+ continue;
+ // This returns the basic block of E if E is a direct child of R (not a
+ // subregion.)
+ BasicBlock *BB = E->getEntry();
+ // Need to push in the order to make it easier to find the first Select
+ // later.
+ for (Instruction &I : *BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ Selects.push_back(SI);
+ ++Stats.NumBranches;
+ }
+ }
+ }
+ if (Selects.size() > 0) {
+ auto AddSelects = [&](RegInfo &RI) {
+ for (auto *SI : Selects)
+ if (CheckBiasedSelect(SI, RI.R,
+ TrueBiasedSelectsGlobal,
+ FalseBiasedSelectsGlobal,
+ SelectBiasMap))
+ RI.Selects.push_back(SI);
+ };
+ if (!Result) {
+ CHR_DEBUG(dbgs() << "Found a select-only region\n");
+ RegInfo RI(R);
+ AddSelects(RI);
+ Result = new CHRScope(RI);
+ Scopes.insert(Result);
+ } else {
+ CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
+ AddSelects(Result->RegInfos[0]);
+ }
+ }
+ }
+
+ if (Result) {
+ checkScopeHoistable(Result);
+ }
+ return Result;
+}
+
+// Check that any of the branch and the selects in the region could be
+// hoisted above the the CHR branch insert point (the most dominating of
+// them, either the branch (at the end of the first block) or the first
+// select in the first block). If the branch can't be hoisted, drop the
+// selects in the first blocks.
+//
+// For example, for the following scope/region with selects, we want to insert
+// the merged branch right before the first select in the first/entry block by
+// hoisting c1, c2, c3, and c4.
+//
+// // Branch insert point here.
+// a = c1 ? b : c; // Select 1
+// d = c2 ? e : f; // Select 2
+// if (c3) { // Branch
+// ...
+// c4 = foo() // A call.
+// g = c4 ? h : i; // Select 3
+// }
+//
+// But suppose we can't hoist c4 because it's dependent on the preceding
+// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
+// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
+void CHR::checkScopeHoistable(CHRScope *Scope) {
+ RegInfo &RI = Scope->RegInfos[0];
+ Region *R = RI.R;
+ BasicBlock *EntryBB = R->getEntry();
+ auto *Branch = RI.HasBranch ?
+ cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
+ SmallVector<SelectInst *, 8> &Selects = RI.Selects;
+ if (RI.HasBranch || !Selects.empty()) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ // Avoid a data dependence from a select or a branch to a(nother)
+ // select. Note no instruction can't data-depend on a branch (a branch
+ // instruction doesn't produce a value).
+ DenseSet<Instruction *> Unhoistables;
+ // Initialize Unhoistables with the selects.
+ for (SelectInst *SI : Selects) {
+ Unhoistables.insert(SI);
+ }
+ // Remove Selects that can't be hoisted.
+ for (auto it = Selects.begin(); it != Selects.end(); ) {
+ SelectInst *SI = *it;
+ if (SI == InsertPoint) {
+ ++it;
+ continue;
+ }
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr);
+ if (!IsHoistable) {
+ CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
+ it = Selects.erase(it);
+ // Since we are dropping the select here, we also drop it from
+ // Unhoistables.
+ Unhoistables.erase(SI);
+ } else
+ ++it;
+ }
+ // Update InsertPoint after potentially removing selects.
+ InsertPoint = getBranchInsertPoint(RI);
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+ if (RI.HasBranch && InsertPoint != Branch) {
+ bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr);
+ if (!IsHoistable) {
+ // If the branch isn't hoistable, drop the selects in the entry
+ // block, preferring the branch, which makes the branch the hoist
+ // point.
+ assert(InsertPoint != Branch && "Branch must not be the hoist point");
+ CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
+ CHR_DEBUG(
+ for (SelectInst *SI : Selects) {
+ dbgs() << "SI " << *SI << "\n";
+ });
+ Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
+ [EntryBB](SelectInst *SI) {
+ return SI->getParent() == EntryBB;
+ }), Selects.end());
+ Unhoistables.clear();
+ InsertPoint = Branch;
+ }
+ }
+ CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
+#ifndef NDEBUG
+ if (RI.HasBranch) {
+ assert(!DT.dominates(Branch, InsertPoint) &&
+ "Branch can't be already above the hoist point");
+ assert(checkHoistValue(Branch->getCondition(), InsertPoint,
+ DT, Unhoistables, nullptr) &&
+ "checkHoistValue for branch");
+ }
+ for (auto *SI : Selects) {
+ assert(!DT.dominates(SI, InsertPoint) &&
+ "SI can't be already above the hoist point");
+ assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, nullptr) &&
+ "checkHoistValue for selects");
+ }
+ CHR_DEBUG(dbgs() << "Result\n");
+ if (RI.HasBranch) {
+ CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
+ }
+ for (auto *SI : Selects) {
+ CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
+ }
+#endif
+ }
+}
+
+// Traverse the region tree, find all nested scopes and merge them if possible.
+CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
+ SmallVectorImpl<CHRScope *> &Scopes) {
+ CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
+ CHRScope *Result = findScope(R);
+ // Visit subscopes.
+ CHRScope *ConsecutiveSubscope = nullptr;
+ SmallVector<CHRScope *, 8> Subscopes;
+ for (auto It = R->begin(); It != R->end(); ++It) {
+ const std::unique_ptr<Region> &SubR = *It;
+ auto Next_It = std::next(It);
+ Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
+ CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
+ << "\n");
+ CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
+ if (SubCHRScope) {
+ CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
+ } else {
+ CHR_DEBUG(dbgs() << "Subregion Scope null\n");
+ }
+ if (SubCHRScope) {
+ if (!ConsecutiveSubscope)
+ ConsecutiveSubscope = SubCHRScope;
+ else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ ConsecutiveSubscope = SubCHRScope;
+ } else
+ ConsecutiveSubscope->append(SubCHRScope);
+ } else {
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ ConsecutiveSubscope = nullptr;
+ }
+ }
+ if (ConsecutiveSubscope) {
+ Subscopes.push_back(ConsecutiveSubscope);
+ }
+ for (CHRScope *Sub : Subscopes) {
+ if (Result) {
+ // Combine it with the parent.
+ Result->addSub(Sub);
+ } else {
+ // Push Subscopes as they won't be combined with the parent.
+ Scopes.push_back(Sub);
+ }
+ }
+ return Result;
+}
+
+static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
+ DenseSet<Value *> ConditionValues;
+ if (RI.HasBranch) {
+ auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
+ ConditionValues.insert(BI->getCondition());
+ }
+ for (SelectInst *SI : RI.Selects) {
+ ConditionValues.insert(SI->getCondition());
+ }
+ return ConditionValues;
+}
+
+
+// Determine whether to split a scope depending on the sets of the branch
+// condition values of the previous region and the current region. We split
+// (return true) it if 1) the condition values of the inner/lower scope can't be
+// hoisted up to the outer/upper scope, or 2) the two sets of the condition
+// values have an empty intersection (because the combined branch conditions
+// won't probably lead to a simpler combined condition).
+static bool shouldSplit(Instruction *InsertPoint,
+ DenseSet<Value *> &PrevConditionValues,
+ DenseSet<Value *> &ConditionValues,
+ DominatorTree &DT,
+ DenseSet<Instruction *> &Unhoistables) {
+ CHR_DEBUG(
+ dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
+ for (Value *V : PrevConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ assert(InsertPoint && "Null InsertPoint");
+ // If any of Bases isn't hoistable to the hoist point, split.
+ for (Value *V : ConditionValues) {
+ if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
+ CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
+ return true; // Not hoistable, split.
+ }
+ }
+ // If PrevConditionValues or ConditionValues is empty, don't split to avoid
+ // unnecessary splits at scopes with no branch/selects. If
+ // PrevConditionValues and ConditionValues don't intersect at all, split.
+ if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
+ // Use std::set as DenseSet doesn't work with set_intersection.
+ std::set<Value *> PrevBases, Bases;
+ for (Value *V : PrevConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT);
+ PrevBases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ for (Value *V : ConditionValues) {
+ std::set<Value *> BaseValues = getBaseValues(V, DT);
+ Bases.insert(BaseValues.begin(), BaseValues.end());
+ }
+ CHR_DEBUG(
+ dbgs() << "PrevBases ";
+ for (Value *V : PrevBases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << " Bases ";
+ for (Value *V : Bases) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ std::set<Value *> Intersection;
+ std::set_intersection(PrevBases.begin(), PrevBases.end(),
+ Bases.begin(), Bases.end(),
+ std::inserter(Intersection, Intersection.begin()));
+ if (Intersection.empty()) {
+ // Empty intersection, split.
+ CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
+ return true;
+ }
+ }
+ CHR_DEBUG(dbgs() << "No split\n");
+ return false; // Don't split.
+}
+
+static void GetSelectsInScope(CHRScope *Scope,
+ DenseSet<Instruction *> &Output) {
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (SelectInst *SI : RI.Selects) {
+ Output.insert(SI);
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs) {
+ GetSelectsInScope(Sub, Output);
+ }
+}
+
+void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(!Scope->BranchInsertPoint &&
+ "BranchInsertPoint must not be set");
+ DenseSet<Instruction *> Unhoistables;
+ GetSelectsInScope(Scope, Unhoistables);
+ splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
+ }
+#ifndef NDEBUG
+ for (CHRScope *Scope : Output) {
+ assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
+ }
+#endif
+}
+
+SmallVector<CHRScope *, 8> CHR::splitScope(
+ CHRScope *Scope,
+ CHRScope *Outer,
+ DenseSet<Value *> *OuterConditionValues,
+ Instruction *OuterInsertPoint,
+ SmallVectorImpl<CHRScope *> &Output,
+ DenseSet<Instruction *> &Unhoistables) {
+ if (Outer) {
+ assert(OuterConditionValues && "Null OuterConditionValues");
+ assert(OuterInsertPoint && "Null OuterInsertPoint");
+ }
+ bool PrevSplitFromOuter = true;
+ DenseSet<Value *> PrevConditionValues;
+ Instruction *PrevInsertPoint = nullptr;
+ SmallVector<CHRScope *, 8> Splits;
+ SmallVector<bool, 8> SplitsSplitFromOuter;
+ SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
+ SmallVector<Instruction *, 8> SplitsInsertPoints;
+ SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
+ for (RegInfo &RI : RegInfos) {
+ Instruction *InsertPoint = getBranchInsertPoint(RI);
+ DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
+ CHR_DEBUG(
+ dbgs() << "ConditionValues ";
+ for (Value *V : ConditionValues) {
+ dbgs() << *V << ", ";
+ }
+ dbgs() << "\n");
+ if (RI.R == RegInfos[0].R) {
+ // First iteration. Check to see if we should split from the outer.
+ if (Outer) {
+ CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
+ CHR_DEBUG(dbgs() << "Should split from outer at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
+ ConditionValues, DT, Unhoistables)) {
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ } else {
+ // Not splitting from the outer. Use the outer bases and insert
+ // point. Union the bases.
+ PrevSplitFromOuter = false;
+ PrevConditionValues = *OuterConditionValues;
+ PrevConditionValues.insert(ConditionValues.begin(),
+ ConditionValues.end());
+ PrevInsertPoint = OuterInsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Outer null\n");
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ }
+ } else {
+ CHR_DEBUG(dbgs() << "Should split from prev at "
+ << RI.R->getNameStr() << "\n");
+ if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
+ DT, Unhoistables)) {
+ CHRScope *Tail = Scope->split(RI.R);
+ Scopes.insert(Tail);
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ Scope = Tail;
+ PrevConditionValues = ConditionValues;
+ PrevInsertPoint = InsertPoint;
+ PrevSplitFromOuter = true;
+ } else {
+ // Not splitting. Union the bases. Keep the hoist point.
+ PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
+ }
+ }
+ }
+ Splits.push_back(Scope);
+ SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
+ SplitsConditionValues.push_back(PrevConditionValues);
+ assert(PrevInsertPoint && "Null PrevInsertPoint");
+ SplitsInsertPoints.push_back(PrevInsertPoint);
+ assert(Splits.size() == SplitsConditionValues.size() &&
+ Splits.size() == SplitsSplitFromOuter.size() &&
+ Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
+ Instruction *SplitInsertPoint = SplitsInsertPoints[I];
+ SmallVector<CHRScope *, 8> NewSubs;
+ DenseSet<Instruction *> SplitUnhoistables;
+ GetSelectsInScope(Split, SplitUnhoistables);
+ for (CHRScope *Sub : Split->Subs) {
+ SmallVector<CHRScope *, 8> SubSplits = splitScope(
+ Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
+ SplitUnhoistables);
+ NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
+ }
+ Split->Subs = NewSubs;
+ }
+ SmallVector<CHRScope *, 8> Result;
+ for (size_t I = 0; I < Splits.size(); ++I) {
+ CHRScope *Split = Splits[I];
+ if (SplitsSplitFromOuter[I]) {
+ // Split from the outer.
+ Output.push_back(Split);
+ Split->BranchInsertPoint = SplitsInsertPoints[I];
+ CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
+ << "\n");
+ } else {
+ // Connected to the outer.
+ Result.push_back(Split);
+ }
+ }
+ if (!Outer)
+ assert(Result.empty() &&
+ "If no outer (top-level), must return no nested ones");
+ return Result;
+}
+
+void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
+ for (CHRScope *Scope : Scopes) {
+ assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
+ classifyBiasedScopes(Scope, Scope);
+ CHR_DEBUG(
+ dbgs() << "classifyBiasedScopes " << *Scope << "\n";
+ dbgs() << "TrueBiasedRegions ";
+ for (Region *R : Scope->TrueBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedRegions ";
+ for (Region *R : Scope->FalseBiasedRegions) {
+ dbgs() << R->getNameStr() << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "TrueBiasedSelects ";
+ for (SelectInst *SI : Scope->TrueBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";
+ dbgs() << "FalseBiasedSelects ";
+ for (SelectInst *SI : Scope->FalseBiasedSelects) {
+ dbgs() << *SI << ", ";
+ }
+ dbgs() << "\n";);
+ }
+}
+
+void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
+ for (RegInfo &RI : Scope->RegInfos) {
+ if (RI.HasBranch) {
+ Region *R = RI.R;
+ if (TrueBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->TrueBiasedRegions.insert(R);
+ else if (FalseBiasedRegionsGlobal.count(R) > 0)
+ OutermostScope->FalseBiasedRegions.insert(R);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ for (SelectInst *SI : RI.Selects) {
+ if (TrueBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->TrueBiasedSelects.insert(SI);
+ else if (FalseBiasedSelectsGlobal.count(SI) > 0)
+ OutermostScope->FalseBiasedSelects.insert(SI);
+ else
+ llvm_unreachable("Must be biased");
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs) {
+ classifyBiasedScopes(Sub, OutermostScope);
+ }
+}
+
+static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
+ unsigned NumBiased = Scope->TrueBiasedRegions.size() +
+ Scope->FalseBiasedRegions.size() +
+ Scope->TrueBiasedSelects.size() +
+ Scope->FalseBiasedSelects.size();
+ return NumBiased >= CHRMergeThreshold;
+}
+
+void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ // Filter out the ones with only one region and no subs.
+ if (!hasAtLeastTwoBiasedBranches(Scope)) {
+ CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
+ << Scope->TrueBiasedRegions.size()
+ << " falsy-regions " << Scope->FalseBiasedRegions.size()
+ << " true-selects " << Scope->TrueBiasedSelects.size()
+ << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
+ continue;
+ }
+ Output.push_back(Scope);
+ }
+}
+
+void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ for (CHRScope *Scope : Input) {
+ assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
+ "Empty");
+ setCHRRegions(Scope, Scope);
+ Output.push_back(Scope);
+ CHR_DEBUG(
+ dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
+ for (auto pair : Scope->HoistStopMap) {
+ Region *R = pair.first;
+ dbgs() << "Region " << R->getNameStr() << "\n";
+ for (Instruction *I : pair.second) {
+ dbgs() << "HoistStop " << *I << "\n";
+ }
+ }
+ dbgs() << "CHRRegions" << "\n";
+ for (RegInfo &RI : Scope->CHRRegions) {
+ dbgs() << RI.R->getNameStr() << "\n";
+ });
+ }
+}
+
+void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
+ DenseSet<Instruction *> Unhoistables;
+ // Put the biased selects in Unhoistables because they should stay where they
+ // are and constant-folded after CHR (in case one biased select or a branch
+ // can depend on another biased select.)
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (SelectInst *SI : RI.Selects) {
+ Unhoistables.insert(SI);
+ }
+ }
+ Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
+ for (RegInfo &RI : Scope->RegInfos) {
+ Region *R = RI.R;
+ DenseSet<Instruction *> HoistStops;
+ bool IsHoisted = false;
+ if (RI.HasBranch) {
+ assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
+ OutermostScope->FalseBiasedRegions.count(R) > 0) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ // Note checkHoistValue fills in HoistStops.
+ bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
+ OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
+ "Must be true or false biased");
+ // Note checkHoistValue fills in HoistStops.
+ bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
+ Unhoistables, &HoistStops);
+ assert(IsHoistable && "Must be hoistable");
+ (void)(IsHoistable); // Unused in release build
+ IsHoisted = true;
+ }
+ if (IsHoisted) {
+ OutermostScope->CHRRegions.push_back(RI);
+ OutermostScope->HoistStopMap[R] = HoistStops;
+ }
+ }
+ for (CHRScope *Sub : Scope->Subs)
+ setCHRRegions(Sub, OutermostScope);
+}
+
+bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
+ return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
+}
+
+void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
+ SmallVectorImpl<CHRScope *> &Output) {
+ Output.resize(Input.size());
+ std::copy(Input.begin(), Input.end(), Output.begin());
+ std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
+}
+
+// Return true if V is already hoisted or was hoisted (along with its operands)
+// to the insert point.
+static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
+ HoistStopMapTy &HoistStopMap,
+ DenseSet<Instruction *> &HoistedSet,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ auto IT = HoistStopMap.find(R);
+ assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
+ DenseSet<Instruction *> &HoistStops = IT->second;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I == HoistPoint)
+ return;
+ if (HoistStops.count(I))
+ return;
+ if (auto *PN = dyn_cast<PHINode>(I))
+ if (TrivialPHIs.count(PN))
+ // The trivial phi inserted by the previous CHR scope could replace a
+ // non-phi in HoistStops. Note that since this phi is at the exit of a
+ // previous CHR scope, which dominates this scope, it's safe to stop
+ // hoisting there.
+ return;
+ if (HoistedSet.count(I))
+ // Already hoisted, return.
+ return;
+ assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
+ for (Value *Op : I->operands()) {
+ hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
+ }
+ I->moveBefore(HoistPoint);
+ HoistedSet.insert(I);
+ CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
+ }
+}
+
+// Hoist the dependent condition values of the branches and the selects in the
+// scope to the insert point.
+static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<Instruction *> HoistedSet;
+ for (const RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs);
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
+ HoistedSet, TrivialPHIs);
+ }
+ }
+}
+
+// Negate the predicate if an ICmp if it's used only by branches or selects by
+// swapping the operands of the branches or the selects. Returns true if success.
+static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
+ Instruction *ExcludedUser,
+ CHRScope *Scope) {
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
+ continue;
+ return false;
+ }
+ for (User *U : ICmp->users()) {
+ if (U == ExcludedUser)
+ continue;
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ // Don't need to swap this in terms of
+ // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
+ // mean whehter the branch is likely go into the if-then rather than
+ // successor0/successor1 and because we can tell which edge is the then or
+ // the else one by comparing the destination to the region exit block.
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ Value *TrueValue = SI->getTrueValue();
+ Value *FalseValue = SI->getFalseValue();
+ SI->setTrueValue(FalseValue);
+ SI->setFalseValue(TrueValue);
+ SI->swapProfMetadata();
+ if (Scope->TrueBiasedSelects.count(SI)) {
+ assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->FalseBiasedSelects.insert(SI);
+ } else if (Scope->FalseBiasedSelects.count(SI)) {
+ assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+ "Must not be already in");
+ Scope->TrueBiasedSelects.insert(SI);
+ }
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
+ return true;
+}
+
+// A helper for transformScopes. Insert a trivial phi at the scope exit block
+// for a value that's defined in the scope but used outside it (meaning it's
+// alive at the exit block).
+static void insertTrivialPHIs(CHRScope *Scope,
+ BasicBlock *EntryBlock, BasicBlock *ExitBlock,
+ DenseSet<PHINode *> &TrivialPHIs) {
+ DenseSet<BasicBlock *> BlocksInScopeSet;
+ SmallVector<BasicBlock *, 8> BlocksInScopeVec;
+ for (RegInfo &RI : Scope->RegInfos) {
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ BlocksInScopeSet.insert(BB);
+ BlocksInScopeVec.push_back(BB);
+ }
+ }
+ CHR_DEBUG(
+ dbgs() << "Inserting redudant phis\n";
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ dbgs() << "BlockInScope " << BB->getName() << "\n";
+ });
+ for (BasicBlock *BB : BlocksInScopeVec) {
+ for (Instruction &I : *BB) {
+ SmallVector<Instruction *, 8> Users;
+ for (User *U : I.users()) {
+ if (auto *UI = dyn_cast<Instruction>(U)) {
+ if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
+ // Unless there's already a phi for I at the exit block.
+ !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
+ Users.push_back(UI);
+ } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
+ // There's a loop backedge from a block that's dominated by this
+ // scope to the entry block.
+ CHR_DEBUG(dbgs() << "V " << I << "\n");
+ CHR_DEBUG(dbgs()
+ << "Used at entry block (for a back edge) by a phi user "
+ << *UI << "\n");
+ Users.push_back(UI);
+ }
+ }
+ }
+ if (Users.size() > 0) {
+ // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
+ // ExitBlock. Replace I with the new phi in UI unless UI is another
+ // phi at ExitBlock.
+ unsigned PredCount = std::distance(pred_begin(ExitBlock),
+ pred_end(ExitBlock));
+ PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
+ &ExitBlock->front());
+ for (BasicBlock *Pred : predecessors(ExitBlock)) {
+ PN->addIncoming(&I, Pred);
+ }
+ TrivialPHIs.insert(PN);
+ CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+ for (Instruction *UI : Users) {
+ for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
+ if (UI->getOperand(J) == &I) {
+ UI->setOperand(J, PN);
+ }
+ }
+ CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
+ }
+ }
+ }
+ }
+}
+
+// Assert that all the CHR regions of the scope have a biased branch or select.
+static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
+#ifndef NDEBUG
+ auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
+ if (Scope->TrueBiasedRegions.count(RI.R) ||
+ Scope->FalseBiasedRegions.count(RI.R))
+ return true;
+ for (SelectInst *SI : RI.Selects)
+ if (Scope->TrueBiasedSelects.count(SI) ||
+ Scope->FalseBiasedSelects.count(SI))
+ return true;
+ return false;
+ };
+ for (RegInfo &RI : Scope->CHRRegions) {
+ assert(HasBiasedBranchOrSelect(RI, Scope) &&
+ "Must have biased branch or select");
+ }
+#endif
+}
+
+// Assert that all the condition values of the biased branches and selects have
+// been hoisted to the pre-entry block or outside of the scope.
+static void assertBranchOrSelectConditionHoisted(CHRScope *Scope,
+ BasicBlock *PreEntryBlock) {
+ CHR_DEBUG(dbgs() << "Biased regions condition values \n");
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
+ if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ Value *V = BI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ for (SelectInst *SI : RI.Selects) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
+ if (!(IsTrueBiased || IsFalseBiased))
+ continue;
+ Value *V = SI->getCondition();
+ CHR_DEBUG(dbgs() << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ assert((I->getParent() == PreEntryBlock ||
+ !Scope->contains(I)) &&
+ "Must have been hoisted to PreEntryBlock or outside the scope");
+ }
+ }
+ }
+}
+
+void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
+ CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
+
+ assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
+ Region *FirstRegion = Scope->RegInfos[0].R;
+ BasicBlock *EntryBlock = FirstRegion->getEntry();
+ Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
+ BasicBlock *ExitBlock = LastRegion->getExit();
+ Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
+
+ if (ExitBlock) {
+ // Insert a trivial phi at the exit block (where the CHR hot path and the
+ // cold path merges) for a value that's defined in the scope but used
+ // outside it (meaning it's alive at the exit block). We will add the
+ // incoming values for the CHR cold paths to it below. Without this, we'd
+ // miss updating phi's for such values unless there happens to already be a
+ // phi for that value there.
+ insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+ }
+
+ // Split the entry block of the first region. The new block becomes the new
+ // entry block of the first region. The old entry block becomes the block to
+ // insert the CHR branch into. Note DT gets updated. Since DT gets updated
+ // through the split, we update the entry of the first region after the split,
+ // and Region only points to the entry and the exit blocks, rather than
+ // keeping everything in a list or set, the blocks membership and the
+ // entry/exit blocks of the region are still valid after the split.
+ CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
+ << " at " << *Scope->BranchInsertPoint << "\n");
+ BasicBlock *NewEntryBlock =
+ SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ FirstRegion->replaceEntryRecursive(NewEntryBlock);
+ BasicBlock *PreEntryBlock = EntryBlock;
+
+ ValueToValueMapTy VMap;
+ // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
+ // hot path (originals) and a cold path (clones) and update the PHIs at the
+ // exit block.
+ cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
+
+ // Replace the old (placeholder) branch with the new (merged) conditional
+ // branch.
+ BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
+ NewEntryBlock, VMap);
+
+#ifndef NDEBUG
+ assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
+#endif
+
+ // Hoist the conditional values of the branches/selects.
+ hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
+
+#ifndef NDEBUG
+ assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
+#endif
+
+ // Create the combined branch condition and constant-fold the branches/selects
+ // in the hot path.
+ fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
+ ProfileCount ? ProfileCount.getValue() : 0);
+}
+
+// A helper for transformScopes. Clone the blocks in the scope (excluding the
+// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
+// at the exit block.
+void CHR::cloneScopeBlocks(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BasicBlock *ExitBlock,
+ Region *LastRegion,
+ ValueToValueMapTy &VMap) {
+ // Clone all the blocks. The original blocks will be the hot-path
+ // CHR-optimized code and the cloned blocks will be the original unoptimized
+ // code. This is so that the block pointers from the
+ // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
+ // which CHR should apply to.
+ SmallVector<BasicBlock*, 8> NewBlocks;
+ for (RegInfo &RI : Scope->RegInfos)
+ for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
+ // sub-Scopes.
+ assert(BB != PreEntryBlock && "Don't copy the preetntry block");
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
+ NewBlocks.push_back(NewBB);
+ VMap[BB] = NewBB;
+ }
+
+ // Place the cloned blocks right after the original blocks (right before the
+ // exit block of.)
+ if (ExitBlock)
+ F.getBasicBlockList().splice(ExitBlock->getIterator(),
+ F.getBasicBlockList(),
+ NewBlocks[0]->getIterator(), F.end());
+
+ // Update the cloned blocks/instructions to refer to themselves.
+ for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+ for (Instruction &I : *NewBlocks[i])
+ RemapInstruction(&I, VMap,
+ RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+
+ // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
+ // the top-level region but we don't need to add PHIs. The trivial PHIs
+ // inserted above will be updated here.
+ if (ExitBlock)
+ for (PHINode &PN : ExitBlock->phis())
+ for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
+ ++I) {
+ BasicBlock *Pred = PN.getIncomingBlock(I);
+ if (LastRegion->contains(Pred)) {
+ Value *V = PN.getIncomingValue(I);
+ auto It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
+ assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
+ PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
+ }
+ }
+}
+
+// A helper for transformScope. Replace the old (placeholder) branch with the
+// new (merged) conditional branch.
+BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
+ BasicBlock *EntryBlock,
+ BasicBlock *NewEntryBlock,
+ ValueToValueMapTy &VMap) {
+ BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
+ assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
+ "SplitBlock did not work correctly!");
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ assert(VMap.find(NewEntryBlock) != VMap.end() &&
+ "NewEntryBlock must have been copied");
+ OldBR->removeFromParent();
+ OldBR->dropAllReferences();
+ // The true predicate is a placeholder. It will be replaced later in
+ // fixupBranchesAndSelects().
+ BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
+ cast<BasicBlock>(VMap[NewEntryBlock]),
+ ConstantInt::getTrue(F.getContext()));
+ PreEntryBlock->getInstList().push_back(NewBR);
+ assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
+ "NewEntryBlock's only pred must be EntryBlock");
+ return NewBR;
+}
+
+// A helper for transformScopes. Create the combined branch condition and
+// constant-fold the branches/selects in the hot path.
+void CHR::fixupBranchesAndSelects(CHRScope *Scope,
+ BasicBlock *PreEntryBlock,
+ BranchInst *MergedBR,
+ uint64_t ProfileCount) {
+ Value *MergedCondition = ConstantInt::getTrue(F.getContext());
+ BranchProbability CHRBranchBias(1, 1);
+ uint64_t NumCHRedBranches = 0;
+ IRBuilder<> IRB(PreEntryBlock->getTerminator());
+ for (RegInfo &RI : Scope->CHRRegions) {
+ Region *R = RI.R;
+ if (RI.HasBranch) {
+ fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ for (SelectInst *SI : RI.Selects) {
+ fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
+ ++NumCHRedBranches;
+ }
+ }
+ Stats.NumBranchesDelta += NumCHRedBranches - 1;
+ Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
+ MergedBR->setCondition(MergedCondition);
+ SmallVector<uint32_t, 2> Weights;
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
+ Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
+ MDBuilder MDB(F.getContext());
+ MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
+ << "\n");
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a branch in the hot path.
+void CHR::fixupBranch(Region *R, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
+ assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
+ "Must be truthy or falsy");
+ auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
+ assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = BranchBiasMap[R];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ BasicBlock *IfThen = BI->getSuccessor(1);
+ BasicBlock *IfElse = BI->getSuccessor(0);
+ BasicBlock *RegionExitBlock = R->getExit();
+ assert(RegionExitBlock && "Null ExitBlock");
+ assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
+ IfThen != IfElse && "Invariant from findScopes");
+ if (IfThen == RegionExitBlock) {
+ // Swap them so that IfThen means going into it and IfElse means skipping
+ // it.
+ std::swap(IfThen, IfElse);
+ }
+ CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
+ << " IfElse " << IfElse->getName() << "\n");
+ Value *Cond = BI->getCondition();
+ BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
+ bool ConditionTrue = HotTarget == BI->getSuccessor(0);
+ addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
+ MergedCondition);
+ // Constant-fold the branch at ClonedEntryBlock.
+ assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
+ "The successor shouldn't change");
+ Value *NewCondition = ConditionTrue ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ BI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranchesAndSelects. Add to the combined branch condition
+// and constant-fold a select in the hot path.
+void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition,
+ BranchProbability &CHRBranchBias) {
+ bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
+ assert((IsTrueBiased ||
+ Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
+ assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
+ "Must be in the bias map");
+ BranchProbability Bias = SelectBiasMap[SI];
+ assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
+ // Take the min.
+ if (CHRBranchBias > Bias)
+ CHRBranchBias = Bias;
+ Value *Cond = SI->getCondition();
+ addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
+ MergedCondition);
+ Value *NewCondition = IsTrueBiased ?
+ ConstantInt::getTrue(F.getContext()) :
+ ConstantInt::getFalse(F.getContext());
+ SI->setCondition(NewCondition);
+}
+
+// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
+// condition.
+void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
+ Instruction *BranchOrSelect,
+ CHRScope *Scope,
+ IRBuilder<> &IRB,
+ Value *&MergedCondition) {
+ if (IsTrueBiased) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ } else {
+ // If Cond is an icmp and all users of V except for BranchOrSelect is a
+ // branch, negate the icmp predicate and swap the branch targets and avoid
+ // inserting an Xor to negate Cond.
+ bool Done = false;
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
+ if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
+ MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
+ Done = true;
+ }
+ if (!Done) {
+ Value *Negate = IRB.CreateXor(
+ ConstantInt::getTrue(F.getContext()), Cond);
+ MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
+ }
+ }
+}
+
+void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
+ unsigned i = 0;
+ (void)(i); // Unused in release build.
+ DenseSet<PHINode *> TrivialPHIs;
+ for (CHRScope *Scope : CHRScopes) {
+ transformScopes(Scope, TrivialPHIs);
+ CHR_DEBUG(
+ std::ostringstream oss;
+ oss << " after transformScopes " << i++;
+ dumpIR(F, oss.str().c_str(), nullptr));
+ }
+}
+
+static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) {
+ dbgs() << Label << " " << Scopes.size() << "\n";
+ for (CHRScope *Scope : Scopes) {
+ dbgs() << *Scope << "\n";
+ }
+}
+
+bool CHR::run() {
+ if (!shouldApply(F, PSI))
+ return false;
+
+ CHR_DEBUG(dumpIR(F, "before", nullptr));
+
+ bool Changed = false;
+ {
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Recursively traverse the region tree and find regions that have biased
+ // branches and/or selects and create scopes.
+ SmallVector<CHRScope *, 8> AllScopes;
+ findScopes(AllScopes);
+ CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
+
+ // Split the scopes if 1) the conditiona values of the biased
+ // branches/selects of the inner/lower scope can't be hoisted up to the
+ // outermost/uppermost scope entry, or 2) the condition values of the biased
+ // branches/selects in a scope (including subscopes) don't share at least
+ // one common value.
+ SmallVector<CHRScope *, 8> SplitScopes;
+ splitScopes(AllScopes, SplitScopes);
+ CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
+
+ // After splitting, set the biased regions and selects of a scope (a tree
+ // root) that include those of the subscopes.
+ classifyBiasedScopes(SplitScopes);
+ CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
+
+ // Filter out the scopes that has only one biased region or select (CHR
+ // isn't useful in such a case).
+ SmallVector<CHRScope *, 8> FilteredScopes;
+ filterScopes(SplitScopes, FilteredScopes);
+ CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
+
+ // Set the regions to be CHR'ed and their hoist stops for each scope.
+ SmallVector<CHRScope *, 8> SetScopes;
+ setCHRRegions(FilteredScopes, SetScopes);
+ CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
+
+ // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
+ // ones. We need to apply CHR from outer to inner so that we apply CHR only
+ // to the hot path, rather than both hot and cold paths.
+ SmallVector<CHRScope *, 8> SortedScopes;
+ sortScopes(SetScopes, SortedScopes);
+ CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
+
+ CHR_DEBUG(
+ dbgs() << "RegionInfo:\n";
+ RI.print(dbgs()));
+
+ // Apply the CHR transformation.
+ if (!SortedScopes.empty()) {
+ transformScopes(SortedScopes);
+ Changed = true;
+ }
+ }
+
+ if (Changed)
+ CHR_DEBUG(dumpIR(F, "after", &Stats));
+
+ return Changed;
+}
+
+bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ProfileSummaryInfo &PSI =
+ *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
+ return CHR(F, BFI, DT, PSI, RI).run();
+}
+
+namespace llvm {
+
+ControlHeightReductionPass::ControlHeightReductionPass() {
+ ParseCHRFilterFiles();
+}
+
+PreservedAnalyses ControlHeightReductionPass::run(
+ Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto &MAM = MAMProxy.getManager();
+ auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
+ bool Changed = CHR(F, BFI, DT, PSI, RI).run();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+
+} // namespace llvm
OpenPOWER on IntegriCloud