summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
authorDaniel Jasper <djasper@google.com>2016-12-19 08:22:17 +0000
committerDaniel Jasper <djasper@google.com>2016-12-19 08:22:17 +0000
commitaec2fa352f533d230ab50b6c3002a1a664c9d6c2 (patch)
tree21fa530583cde5282092391e6891d959208f28d9 /llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
parente5f3eba9c31f4d00c73f4714df52ffced4532927 (diff)
downloadbcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.tar.gz
bcm5719-llvm-aec2fa352f533d230ab50b6c3002a1a664c9d6c2.zip
Revert @llvm.assume with operator bundles (r289755-r289757)
This creates non-linear behavior in the inliner (see more details in r289755's commit thread). llvm-svn: 290086
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp49
1 files changed, 35 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index afcb50b2795..70269ed61f5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -369,12 +369,12 @@ public:
InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, unsigned VecWidth,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
- ORE(ORE), VF(VecWidth), UF(UnrollFactor),
+ AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Induction(nullptr),
OldInduction(nullptr), VectorLoopValueMap(UnrollFactor, VecWidth),
TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM),
@@ -706,6 +706,8 @@ protected:
const TargetLibraryInfo *TLI;
/// Target Transform Info.
const TargetTransformInfo *TTI;
+ /// Assumption Cache.
+ AssumptionCache *AC;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
@@ -788,11 +790,11 @@ public:
InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
+ const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM)
- : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, ORE, 1,
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
UnrollFactor, LVL, CM) {}
private:
@@ -1848,10 +1850,11 @@ public:
LoopInfo *LI, LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
const TargetLibraryInfo *TLI, DemandedBits *DB,
+ AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, const Function *F,
const LoopVectorizeHints *Hints)
: TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
- ORE(ORE), TheFunction(F), Hints(Hints) {}
+ AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {}
/// Information about vectorization costs
struct VectorizationFactor {
@@ -2004,6 +2007,8 @@ public:
const TargetLibraryInfo *TLI;
/// Demanded bits analysis.
DemandedBits *DB;
+ /// Assumption cache.
+ AssumptionCache *AC;
/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
@@ -2117,6 +2122,7 @@ struct LoopVectorize : public FunctionPass {
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
@@ -2124,11 +2130,12 @@ struct LoopVectorize : public FunctionPass {
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
- return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, GetLAA,
- *ORE);
+ return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
+ GetLAA, *ORE);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addRequired<BlockFrequencyInfoWrapperPass>();
@@ -3056,6 +3063,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Add the cloned scalar to the scalar map entry.
Entry[Part][Lane] = Cloned;
+ // If we just cloned a new assumption, add it the assumption cache.
+ if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+
// End if-block.
if (IfPredicateInstr)
PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
@@ -7168,6 +7180,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
@@ -7196,7 +7209,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
void LoopVectorizationCostModel::collectValuesToIgnore() {
// Ignore ephemeral values.
- CodeMetrics::collectEphemeralValues(TheLoop, ValuesToIgnore);
+ CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
// Ignore type-promoting instructions we identified during reduction
// detection.
@@ -7270,6 +7283,11 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// Add the cloned scalar to the scalar map entry.
Entry[Part][0] = Cloned;
+ // If we just cloned a new assumption, add it the assumption cache.
+ if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
+
// End if-block.
if (IfPredicateInstr)
PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
@@ -7409,7 +7427,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, ORE, F,
+ LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints);
CM.collectValuesToIgnore();
@@ -7545,7 +7563,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
assert(IC > 1 && "interleave count should not be 1 or 0");
// If we decided that it is not legal to vectorize the loop, then
// interleave it.
- InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, ORE, IC, &LVL, &CM);
+ InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
+ &CM);
Unroller.vectorize();
ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -7554,8 +7573,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
<< NV("InterleaveCount", IC) << ")");
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, ORE, VF.Width, IC, &LVL,
- &CM);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
+ &LVL, &CM);
LB.vectorize();
++LoopsVectorized;
@@ -7583,7 +7602,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool LoopVectorizePass::runImpl(
Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
- DemandedBits &DB_, AliasAnalysis &AA_,
+ DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
OptimizationRemarkEmitter &ORE_) {
@@ -7594,6 +7613,7 @@ bool LoopVectorizePass::runImpl(
BFI = &BFI_;
TLI = TLI_;
AA = &AA_;
+ AC = &AC_;
GetLAA = &GetLAA_;
DB = &DB_;
ORE = &ORE_;
@@ -7643,6 +7663,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F);
auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
@@ -7652,7 +7673,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
return LAM.getResult<LoopAccessAnalysis>(L);
};
bool Changed =
- runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, GetLAA, ORE);
+ runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA, ORE);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
OpenPOWER on IntegriCloud