diff options
author | Nicola Zaghen <nicola.zaghen@imgtec.com> | 2018-05-14 12:53:11 +0000 |
---|---|---|
committer | Nicola Zaghen <nicola.zaghen@imgtec.com> | 2018-05-14 12:53:11 +0000 |
commit | d34e60ca8532511acb8c93ef26297e349fbec86a (patch) | |
tree | 1a095bc8694498d94232e81b95c1da05d462d3ec /llvm/lib/Transforms/Vectorize | |
parent | affbc99bea94e77f7ebccd8ba887e33051bd04ee (diff) | |
download | bcm5719-llvm-d34e60ca8532511acb8c93ef26297e349fbec86a.tar.gz bcm5719-llvm-d34e60ca8532511acb8c93ef26297e349fbec86a.zip |
Rename DEBUG macro to LLVM_DEBUG.
The DEBUG() macro is very generic so it might clash with other projects.
The renaming was done as follows:
- git grep -l 'DEBUG' | xargs sed -i 's/\bDEBUG\s\?(/LLVM_DEBUG(/g'
- git diff -U0 master | ../clang/tools/clang-format/clang-format-diff.py -i -p1 -style LLVM
- Manual change to APInt
- Manually chage DOCS as regex doesn't match it.
In the transition period the DEBUG() macro is still present and aliased
to the LLVM_DEBUG() one.
Differential Revision: https://reviews.llvm.org/D43624
llvm-svn: 332240
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 84 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 253 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 277 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlan.cpp | 14 |
5 files changed, 351 insertions, 309 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index c1d3f925515..a6acf3e558a 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -510,7 +510,7 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { SmallVector<Instruction *, 16> ChainInstrs; bool IsLoadChain = isa<LoadInst>(Chain[0]); - DEBUG({ + LLVM_DEBUG({ for (Instruction *I : Chain) { if (IsLoadChain) assert(isa<LoadInst>(I) && @@ -532,11 +532,12 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { Intrinsic::sideeffect) { // Ignore llvm.sideeffect calls. } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) { - DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n'); + LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I + << '\n'); break; } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) { - DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I - << '\n'); + LLVM_DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I + << '\n'); break; } } @@ -588,7 +589,7 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) { if (!AA.isNoAlias(MemoryLocation::get(MemInstr), MemoryLocation::get(ChainInstr))) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "LSV: Found alias:\n" " Aliasing instruction and pointer:\n" << " " << *MemInstr << '\n' @@ -744,7 +745,7 @@ bool Vectorizer::vectorizeChains(InstrListMap &Map) { if (Size < 2) continue; - DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n"); + LLVM_DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n"); // Process the stores in chunks of 64. for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) { @@ -758,7 +759,8 @@ bool Vectorizer::vectorizeChains(InstrListMap &Map) { } bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) { - DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n"); + LLVM_DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() + << " instructions.\n"); SmallVector<int, 16> Heads, Tails; int ConsecutiveChain[64]; @@ -894,14 +896,14 @@ bool Vectorizer::vectorizeStoreChain( // vector factor, break it into two pieces. unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy); if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) { - DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." - " Creating two separate arrays.\n"); + LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." + " Creating two separate arrays.\n"); return vectorizeStoreChain(Chain.slice(0, TargetVF), InstructionsProcessed) | vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed); } - DEBUG({ + LLVM_DEBUG({ dbgs() << "LSV: Stores to vectorize:\n"; for (Instruction *I : Chain) dbgs() << " " << *I << "\n"; @@ -1042,8 +1044,8 @@ bool Vectorizer::vectorizeLoadChain( // vector factor, break it into two pieces. unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy); if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) { - DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." - " Creating two separate arrays.\n"); + LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." + " Creating two separate arrays.\n"); return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) | vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed); } @@ -1066,7 +1068,7 @@ bool Vectorizer::vectorizeLoadChain( Alignment = NewAlign; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "LSV: Loads to vectorize:\n"; for (Instruction *I : Chain) I->dump(); @@ -1149,7 +1151,7 @@ bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(), SzInBytes * 8, AddressSpace, Alignment, &Fast); - DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows - << " and fast? " << Fast << "\n";); + LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows + << " and fast? " << Fast << "\n";); return !Allows || !Fast; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index d1fd2eb68a8..697bc1b448d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -98,26 +98,26 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool DisableInterleaving, // consider the loop to have been already vectorized because there's // nothing more that we can do. IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; - DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs() - << "LV: Interleaving disabled by the pass manager\n"); + LLVM_DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs() + << "LV: Interleaving disabled by the pass manager\n"); } bool LoopVectorizeHints::allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const { if (getForce() == LoopVectorizeHints::FK_Disabled) { - DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); emitRemarkWithHints(); return false; } if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) { - DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); emitRemarkWithHints(); return false; } if (getIsVectorized() == 1) { - DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); // FIXME: Add interleave.disable metadata. This will allow // vectorize.disable to be used without disabling the pass and errors // to differentiate between disabled vectorization and a width of 1. @@ -223,7 +223,7 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { if (H->validate(Val)) H->Value = Val; else - DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); + LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); break; } } @@ -309,7 +309,7 @@ bool LoopVectorizationRequirements::doesNotMeet( << "loop not vectorized: cannot prove it is safe to reorder " "memory operations"; }); - DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); + LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); Failed = true; } @@ -350,7 +350,7 @@ static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { // 1. PHINode *IV = Lp->getCanonicalInductionVariable(); if (!IV) { - DEBUG(dbgs() << "LV: Canonical IV not found.\n"); + LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); return false; } @@ -358,14 +358,15 @@ static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { BasicBlock *Latch = Lp->getLoopLatch(); auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); if (!LatchBr || LatchBr->isUnconditional()) { - DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); + LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); return false; } // 3. auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); if (!LatchCmp) { - DEBUG(dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); + LLVM_DEBUG( + dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); return false; } @@ -374,7 +375,7 @@ static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { Value *IVUpdate = IV->getIncomingValueForBlock(Latch); if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { - DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); + LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); return false; } @@ -441,7 +442,7 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, Instruction *UI = cast<Instruction>(U); // This user may be a reduction exit value. if (!TheLoop->contains(UI)) { - DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); return true; } } @@ -474,7 +475,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { // not supported yet. auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); if (!Br) { - DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n"); + LLVM_DEBUG(dbgs() << "LV: Unsupported basic block terminator.\n"); ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) @@ -490,7 +491,7 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { !TheLoop->isLoopInvariant(Br->getCondition()) && !LI->isLoopHeader(Br->getSuccessor(0)) && !LI->isLoopHeader(Br->getSuccessor(1))) { - DEBUG(dbgs() << "LV: Unsupported conditional branch.\n"); + LLVM_DEBUG(dbgs() << "LV: Unsupported conditional branch.\n"); ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) @@ -504,8 +505,9 @@ bool LoopVectorizationLegality::canVectorizeOuterLoop() { // simple outer loops scenarios with uniform nested loops. if (!isUniformLoopNest(TheLoop /*loop nest*/, TheLoop /*context outer loop*/)) { - DEBUG(dbgs() - << "LV: Not vectorizing: Outer loop contains divergent loops.\n"); + LLVM_DEBUG( + dbgs() + << "LV: Not vectorizing: Outer loop contains divergent loops.\n"); ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) @@ -565,7 +567,7 @@ void LoopVectorizationLegality::addInductionPhi( AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); } - DEBUG(dbgs() << "LV: Found an induction variable.\n"); + LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); } bool LoopVectorizationLegality::canVectorizeInstrs() { @@ -587,7 +589,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { !PhiTy->isPointerTy()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) << "loop control flow is not understood by vectorizer"); - DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); + LLVM_DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; } @@ -609,7 +611,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (Phi->getNumIncomingValues() != 2) { ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi) << "control flow not understood by vectorizer"); - DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); + LLVM_DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); return false; } @@ -647,7 +649,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi) << "value that could not be identified as " "reduction is used outside the loop"); - DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n"); return false; } // end of PHI handling @@ -662,7 +664,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { ORE->emit(createMissedAnalysis("CantVectorizeCall", CI) << "call instruction cannot be vectorized"); - DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); + LLVM_DEBUG( + dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n"); return false; } @@ -674,7 +677,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI) << "intrinsic instruction cannot be vectorized"); - DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); + LLVM_DEBUG(dbgs() + << "LV: Found unvectorizable intrinsic " << *CI << "\n"); return false; } } @@ -686,7 +690,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { isa<ExtractElementInst>(I)) { ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I) << "instruction return type cannot be vectorized"); - DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); + LLVM_DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); return false; } @@ -706,7 +710,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // semantics. } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && !I.isFast()) { - DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); + LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); Hints->setPotentiallyUnsafe(); } @@ -721,7 +725,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } if (!PrimaryInduction) { - DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); + LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); if (Inductions.empty()) { ORE->emit(createMissedAnalysis("NoInductionVariable") << "loop induction variable could not be identified"); @@ -753,7 +757,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { if (LAI->hasStoreToLoopInvariantAddress()) { ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress") << "write to a loop invariant address could not be vectorized"); - DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); + LLVM_DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); return false; } @@ -903,7 +907,7 @@ bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!Lp->getLoopPreheader()) { - DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n"); + LLVM_DEBUG(dbgs() << "LV: Loop doesn't have a legal pre-header.\n"); ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); if (DoExtraAnalysis) @@ -989,8 +993,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { } // We need to have a loop header. - DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() - << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() + << '\n'); // Specific checks for outer loops. We skip the remaining legal checks at this // point because they don't support outer loops. @@ -998,13 +1002,13 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { assert(UseVPlanNativePath && "VPlan-native path is not enabled."); if (!canVectorizeOuterLoop()) { - DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Unsupported outer loop.\n"); // TODO: Implement DoExtraAnalysis when subsequent legal checks support // outer loops. return false; } - DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); + LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); return Result; } @@ -1012,7 +1016,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { - DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); + LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); if (DoExtraAnalysis) Result = false; else @@ -1021,7 +1025,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { - DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); + LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); if (DoExtraAnalysis) Result = false; else @@ -1030,18 +1034,18 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { - DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); + LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); if (DoExtraAnalysis) Result = false; else return false; } - DEBUG(dbgs() << "LV: We can vectorize this loop" - << (LAI->getRuntimePointerChecking()->Need - ? " (with a runtime bound check)" - : "") - << "!\n"); + LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" + << (LAI->getRuntimePointerChecking()->Need + ? " (with a runtime bound check)" + : "") + << "!\n"); unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) @@ -1051,7 +1055,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks") << "Too many SCEV assumptions need to be made and checked " << "at runtime"); - DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); + LLVM_DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); if (DoExtraAnalysis) Result = false; else diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index eefaf22d028..a65dc09baa6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1628,20 +1628,20 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp, Function *Fn = OuterLp->getHeader()->getParent(); if (!Hints.allowVectorization(Fn, OuterLp, false /*AlwaysVectorize*/)) { - DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n"); + LLVM_DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n"); return false; } if (!Hints.getWidth()) { - DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n"); emitMissedWarning(Fn, OuterLp, Hints, ORE); return false; } if (Hints.getInterleave() > 1) { // TODO: Interleave support is future work. - DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " - "outer loops.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " + "outer loops.\n"); emitMissedWarning(Fn, OuterLp, Hints, ORE); return false; } @@ -4123,7 +4123,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { default: // This instruction is not vectorized by simple widening. - DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); + LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); llvm_unreachable("Unhandled instruction!"); } // end of switch. } @@ -4235,7 +4235,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { } for (auto *I : ScalarPtrs) if (!PossibleNonScalarPtrs.count(I)) { - DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n"); Worklist.insert(I); } @@ -4252,8 +4252,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { continue; Worklist.insert(Ind); Worklist.insert(IndUpdate); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate + << "\n"); } // Insert the forced scalars. @@ -4280,7 +4281,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { isScalarUse(J, Src)); })) { Worklist.insert(Src); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n"); } } @@ -4320,8 +4321,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // The induction variable and its update instruction will remain scalar. Worklist.insert(Ind); Worklist.insert(IndUpdate); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate + << "\n"); } Scalars[VF].insert(Worklist.begin(), Worklist.end()); @@ -4413,7 +4415,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0)); if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) { Worklist.insert(Cmp); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); } // Holds consecutive and consecutive-like pointers. Consecutive-like pointers @@ -4474,7 +4476,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // aren't also identified as possibly non-uniform. for (auto *V : ConsecutiveLikePtrs) if (!PossibleNonUniformPtrs.count(V)) { - DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); Worklist.insert(V); } @@ -4497,7 +4499,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { isUniformDecision(J, VF)); })) { Worklist.insert(OI); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); } } } @@ -4542,8 +4544,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // The induction variable and its update instruction will remain uniform. Worklist.insert(Ind); Worklist.insert(IndUpdate); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate + << "\n"); } Uniforms[VF].insert(Worklist.begin(), Worklist.end()); @@ -4630,7 +4633,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses( // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. void InterleavedAccessInfo::analyzeInterleaving() { - DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); + LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); const ValueToValueMap &Strides = LAI->getSymbolicStrides(); // Holds all accesses with a constant stride. @@ -4672,7 +4675,8 @@ void InterleavedAccessInfo::analyzeInterleaving() { if (isStrided(DesB.Stride)) { Group = getInterleaveGroup(B); if (!Group) { - DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); + LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B + << '\n'); Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); } if (B->mayWriteToMemory()) @@ -4775,8 +4779,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { // Try to insert A into B's group. if (Group->insertMember(A, IndexA, DesA.Align)) { - DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' - << " into the interleave group with" << *B << '\n'); + LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' + << " into the interleave group with" << *B + << '\n'); InterleaveGroupMap[A] = Group; // Set the first load in program order as the insert position. @@ -4789,8 +4794,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { // Remove interleaved store groups with gaps. for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due " - "to gaps.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved store group due " + "to gaps.\n"); releaseGroup(Group); } // Remove interleaved groups with gaps (currently only loads) whose memory @@ -4822,8 +4828,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " - "first group member potentially pointer-wrapping.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "first group member potentially pointer-wrapping.\n"); releaseGroup(Group); continue; } @@ -4832,8 +4839,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " - "last group member potentially pointer-wrapping.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "last group member potentially pointer-wrapping.\n"); releaseGroup(Group); } } else { @@ -4843,12 +4851,14 @@ void InterleavedAccessInfo::analyzeInterleaving() { // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " - "a reverse access with gaps.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "a reverse access with gaps.\n"); releaseGroup(Group); continue; } - DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); + LLVM_DEBUG( + dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); RequiresScalarEpilogue = true; } } @@ -4858,7 +4868,8 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { // TODO: It may by useful to do since it's still likely to be dynamically // uniform if the target can skip. - DEBUG(dbgs() << "LV: Not inserting runtime ptr check for divergent target"); + LLVM_DEBUG( + dbgs() << "LV: Not inserting runtime ptr check for divergent target"); ORE->emit( createMissedAnalysis("CantVersionLoopWithDivergentTarget") @@ -4876,20 +4887,22 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { << "runtime pointer checks needed. Enable vectorization of this " "loop with '#pragma clang loop vectorize(enable)' when " "compiling with -Os/-Oz"); - DEBUG(dbgs() - << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); + LLVM_DEBUG( + dbgs() + << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); return None; } // If we optimize the program for size, avoid creating the tail loop. - DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); // If we don't know the precise trip count, don't try to vectorize. if (TC < 2) { ORE->emit( createMissedAnalysis("UnknownLoopCountComplexCFG") << "unable to calculate the loop count due to complex control flow"); - DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); + LLVM_DEBUG( + dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return None; } @@ -4907,7 +4920,8 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { "same time. Enable vectorization of this loop " "with '#pragma clang loop vectorize(enable)' " "when compiling with -Os/-Oz"); - DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); + LLVM_DEBUG( + dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return None; } @@ -4932,23 +4946,23 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, unsigned MaxVectorSize = WidestRegister / WidestType; - DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " - << WidestType << " bits.\n"); - DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister - << " bits.\n"); + LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType + << " / " << WidestType << " bits.\n"); + LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: " + << WidestRegister << " bits.\n"); assert(MaxVectorSize <= 256 && "Did not expect to pack so many elements" " into one vector!"); if (MaxVectorSize == 0) { - DEBUG(dbgs() << "LV: The target has no vector registers.\n"); + LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n"); MaxVectorSize = 1; return MaxVectorSize; } else if (ConstTripCount && ConstTripCount < MaxVectorSize && isPowerOf2_32(ConstTripCount)) { // We need to clamp the VF to be the ConstTripCount. There is no point in // choosing a higher viable VF as done in the loop below. - DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " - << ConstTripCount << "\n"); + LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " + << ConstTripCount << "\n"); MaxVectorSize = ConstTripCount; return MaxVectorSize; } @@ -4977,8 +4991,8 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, } if (unsigned MinVF = TTI.getMinimumVF(SmallestType)) { if (MaxVF < MinVF) { - DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF - << ") with target's minimum: " << MinVF << '\n'); + LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF + << ") with target's minimum: " << MinVF << '\n'); MaxVF = MinVF; } } @@ -4991,7 +5005,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { float Cost = expectedCost(1).first; const float ScalarCost = Cost; unsigned Width = 1; - DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; // Ignore scalar width, because the user explicitly wants vectorization. @@ -5006,10 +5020,10 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { // the vector elements. VectorizationCostTy C = expectedCost(i); float VectorCost = C.first / (float)i; - DEBUG(dbgs() << "LV: Vector loop of width " << i - << " costs: " << (int)VectorCost << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i + << " costs: " << (int)VectorCost << ".\n"); if (!C.second && !ForceVectorization) { - DEBUG( + LLVM_DEBUG( dbgs() << "LV: Not considering vector loop of width " << i << " because it will not generate any vector instructions.\n"); continue; @@ -5023,15 +5037,16 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { if (!EnableCondStoresVectorization && NumPredStores) { ORE->emit(createMissedAnalysis("ConditionalStore") << "store that is conditionally executed prevents vectorization"); - DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n"); + LLVM_DEBUG( + dbgs() << "LV: No vectorization. There are conditional stores.\n"); Width = 1; Cost = ScalarCost; } - DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() - << "LV: Vectorization seems to be not beneficial, " - << "but was forced by a user.\n"); - DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); + LLVM_DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() + << "LV: Vectorization seems to be not beneficial, " + << "but was forced by a user.\n"); + LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); VectorizationFactor Factor = {Width, (unsigned)(Width * Cost)}; return Factor; } @@ -5123,8 +5138,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); - DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters - << " registers\n"); + LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers\n"); if (VF == 1) { if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) @@ -5182,7 +5197,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. if (VF > 1 && !Legal->getReductionVars()->empty()) { - DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); return IC; } @@ -5193,7 +5208,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // We want to interleave small loops in order to reduce the loop overhead and // potentially expose ILP opportunities. - DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'); + LLVM_DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'); if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) { // We assume that the cost overhead is 1 and we use the cost model // to estimate the cost of the loop and interleave until the cost of the @@ -5221,11 +5236,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, if (EnableLoadStoreRuntimeInterleave && std::max(StoresIC, LoadsIC) > SmallIC) { - DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n"); + LLVM_DEBUG( + dbgs() << "LV: Interleaving to saturate store or load ports.\n"); return std::max(StoresIC, LoadsIC); } - DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n"); return SmallIC; } @@ -5233,11 +5249,11 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // this point) that could benefit from interleaving. bool HasReductions = !Legal->getReductionVars()->empty(); if (TTI.enableAggressiveInterleaving(HasReductions)) { - DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); return IC; } - DEBUG(dbgs() << "LV: Not Interleaving.\n"); + LLVM_DEBUG(dbgs() << "LV: Not Interleaving.\n"); return 1; } @@ -5327,7 +5343,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { SmallVector<RegisterUsage, 8> RUs(VFs.size()); SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0); - DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); + LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); // A lambda that gets the register usage for the given type and VF. auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) { @@ -5372,8 +5388,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } - DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " - << OpenIntervals.size() << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " + << OpenIntervals.size() << '\n'); // Add the current instruction to the list of open intervals. OpenIntervals.insert(I); @@ -5388,9 +5404,10 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { Invariant += GetRegUsage(Inst->getType(), VFs[i]); } - DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); - DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant + << '\n'); RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; @@ -5587,8 +5604,9 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { BlockCost.first += C.first; BlockCost.second |= C.second; - DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF " - << VF << " For instruction: " << I << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first + << " for VF " << VF << " For instruction: " << I + << '\n'); } // If we are vectorizing a predicated block, it will have been @@ -6247,14 +6265,15 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize, assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); assert(UserVF && "Expected UserVF for outer loop vectorization."); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); - DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); buildVPlans(UserVF, UserVF); return {UserVF, 0}; } - DEBUG(dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the " - "VPlan-native path.\n"); + LLVM_DEBUG( + dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the " + "VPlan-native path.\n"); return NoVectorization; } @@ -6268,13 +6287,13 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { return NoVectorization; if (UserVF) { - DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); buildVPlans(UserVF, UserVF); - DEBUG(printPlans(dbgs())); + LLVM_DEBUG(printPlans(dbgs())); return {UserVF, 0}; } @@ -6292,7 +6311,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { } buildVPlans(1, MaxVF); - DEBUG(printPlans(dbgs())); + LLVM_DEBUG(printPlans(dbgs())); if (MaxVF == 1) return NoVectorization; @@ -6301,7 +6320,8 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { } void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) { - DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n'); + LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF + << '\n'); BestVF = VF; BestUF = UF; @@ -6777,11 +6797,11 @@ VPBasicBlock *LoopVectorizationPlanner::handleReplication( // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { - DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); + LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); VPBB->appendRecipe(Recipe); return VPBB; } - DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); + LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); assert(VPBB->getSuccessors().empty() && "VPBB has successors when handling predicated replication."); // Record predicated instructions for above packing optimizations. @@ -6906,8 +6926,9 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range, // should follow. auto SAIt = SinkAfter.find(Instr); if (SAIt != SinkAfter.end()) { - DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" << *SAIt->second - << " to vectorize a 1st order recurrence.\n"); + LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" + << *SAIt->second + << " to vectorize a 1st order recurrence.\n"); SinkAfterInverse[SAIt->second] = Instr; continue; } @@ -7208,21 +7229,22 @@ bool LoopVectorizePass::processLoop(Loop *L) { const std::string DebugLocStr = getDebugLocString(L); #endif /* NDEBUG */ - DEBUG(dbgs() << "\nLV: Checking a loop in \"" - << L->getHeader()->getParent()->getName() << "\" from " - << DebugLocStr << "\n"); + LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in \"" + << L->getHeader()->getParent()->getName() << "\" from " + << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, DisableUnrolling, *ORE); - DEBUG(dbgs() << "LV: Loop hints:" - << " force=" - << (Hints.getForce() == LoopVectorizeHints::FK_Disabled - ? "disabled" - : (Hints.getForce() == LoopVectorizeHints::FK_Enabled - ? "enabled" - : "?")) - << " width=" << Hints.getWidth() - << " unroll=" << Hints.getInterleave() << "\n"); + LLVM_DEBUG( + dbgs() << "LV: Loop hints:" + << " force=" + << (Hints.getForce() == LoopVectorizeHints::FK_Disabled + ? "disabled" + : (Hints.getForce() == LoopVectorizeHints::FK_Enabled + ? "enabled" + : "?")) + << " width=" << Hints.getWidth() + << " unroll=" << Hints.getInterleave() << "\n"); // Function containing loop Function *F = L->getHeader()->getParent(); @@ -7236,7 +7258,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // benefit from vectorization, respectively. if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { - DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); + LLVM_DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); return false; } @@ -7247,7 +7269,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE, &Requirements, &Hints, DB, AC); if (!LVL.canVectorize(EnableVPlanNativePath)) { - DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints, ORE); return false; } @@ -7297,13 +7319,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { } if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " - << "This loop is worth vectorizing only if no scalar " - << "iteration overheads are incurred."); + LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is worth vectorizing only if no scalar " + << "iteration overheads are incurred."); if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) - DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); // Loops with a very small trip count are considered for vectorization // under OptForSize, thereby making sure the cost of their loop body is // dominant, free of runtime guards and scalar iteration overheads. @@ -7316,8 +7338,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // an integer loop and the vector instructions selected are purely integer // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { - DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); + LLVM_DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" + "attribute is used.\n"); ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "NoImplicitFloat", L) << "loop not vectorized due to NoImplicitFloat attribute"); @@ -7331,7 +7353,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // additional fp-math flags can help. if (Hints.isPotentiallyUnsafe() && TTI->isFPVectorizationPotentiallyUnsafe()) { - DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); + LLVM_DEBUG( + dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); ORE->emit( createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) << "loop not vectorized due to unsafe FP support."); @@ -7375,14 +7398,14 @@ bool LoopVectorizePass::processLoop(Loop *L) { std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg; bool VectorizeLoop = true, InterleaveLoop = true; if (Requirements.doesNotMeet(F, L, Hints)) { - DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " - "requirements.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " + "requirements.\n"); emitMissedWarning(F, L, Hints, ORE); return false; } if (VF.Width == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + LLVM_DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); VecDiagMsg = std::make_pair( "VectorizationNotBeneficial", "the cost-model indicates that vectorization is not beneficial"); @@ -7391,7 +7414,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (IC == 1 && UserIC <= 1) { // Tell the user interleaving is not beneficial. - DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); IntDiagMsg = std::make_pair( "InterleavingNotBeneficial", "the cost-model indicates that interleaving is not beneficial"); @@ -7403,8 +7426,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { } } else if (IC > 1 && UserIC == 1) { // Tell the user interleaving is beneficial, but it explicitly disabled. - DEBUG(dbgs() - << "LV: Interleaving is beneficial but is explicitly disabled."); + LLVM_DEBUG( + dbgs() << "LV: Interleaving is beneficial but is explicitly disabled."); IntDiagMsg = std::make_pair( "InterleavingBeneficialButDisabled", "the cost-model indicates that interleaving is beneficial " @@ -7431,24 +7454,24 @@ bool LoopVectorizePass::processLoop(Loop *L) { }); return false; } else if (!VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first, L->getStartLoc(), L->getHeader()) << VecDiagMsg.second; }); } else if (VectorizeLoop && !InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width + << ") in " << DebugLocStr << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first, L->getStartLoc(), L->getHeader()) << IntDiagMsg.second; }); } else if (VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width + << ") in " << DebugLocStr << '\n'); + LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); } LVP.setBestPlan(VF.Width, IC); @@ -7495,7 +7518,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Mark the loop as already vectorized to avoid vectorizing again. Hints.setAlreadyVectorized(); - DEBUG(verifyFunction(*L->getHeader()->getParent())); + LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent())); return true; } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 639a0525624..2f9fcc7ec1a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1059,7 +1059,7 @@ private: template <typename ReadyListType> void schedule(ScheduleData *SD, ReadyListType &ReadyList) { SD->IsScheduled = true; - DEBUG(dbgs() << "SLP: schedule " << *SD << "\n"); + LLVM_DEBUG(dbgs() << "SLP: schedule " << *SD << "\n"); ScheduleData *BundleMember = SD; while (BundleMember) { @@ -1082,8 +1082,8 @@ private: assert(!DepBundle->IsScheduled && "already scheduled bundle gets ready"); ReadyList.insert(DepBundle); - DEBUG(dbgs() - << "SLP: gets ready (def): " << *DepBundle << "\n"); + LLVM_DEBUG(dbgs() + << "SLP: gets ready (def): " << *DepBundle << "\n"); } }); } @@ -1096,8 +1096,8 @@ private: assert(!DepBundle->IsScheduled && "already scheduled bundle gets ready"); ReadyList.insert(DepBundle); - DEBUG(dbgs() << "SLP: gets ready (mem): " << *DepBundle - << "\n"); + LLVM_DEBUG(dbgs() + << "SLP: gets ready (mem): " << *DepBundle << "\n"); } } BundleMember = BundleMember->NextInBundle; @@ -1122,7 +1122,8 @@ private: doForAllOpcodes(I, [&](ScheduleData *SD) { if (SD->isSchedulingEntity() && SD->isReady()) { ReadyList.insert(SD); - DEBUG(dbgs() << "SLP: initially in ready list: " << *I << "\n"); + LLVM_DEBUG(dbgs() + << "SLP: initially in ready list: " << *I << "\n"); } }); } @@ -1398,12 +1399,12 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, // Check if the scalar is externally used as an extra arg. auto ExtI = ExternallyUsedValues.find(Scalar); if (ExtI != ExternallyUsedValues.end()) { - DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " << - Lane << " from " << *Scalar << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " + << Lane << " from " << *Scalar << ".\n"); ExternalUses.emplace_back(Scalar, nullptr, FoundLane); } for (User *U : Scalar->users()) { - DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n"); Instruction *UserInst = dyn_cast<Instruction>(U); if (!UserInst) @@ -1417,8 +1418,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, // be used. if (UseScalar != U || !InTreeUserNeedToExtract(Scalar, UserInst, TLI)) { - DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U - << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U + << ".\n"); assert(!UseEntry->NeedToGather && "Bad state"); continue; } @@ -1428,8 +1429,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, if (is_contained(UserIgnoreList, UserInst)) continue; - DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << - Lane << " from " << *Scalar << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " + << Lane << " from " << *Scalar << ".\n"); ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); } } @@ -1442,28 +1443,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { - DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } // Don't handle vectors. if (S.OpValue->getType()->isVectorTy()) { - DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { - DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.Opcode) { - DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1474,8 +1475,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // Don't vectorize ephemeral values. for (unsigned i = 0, e = VL.size(); i != e; ++i) { if (EphValues.count(VL[i])) { - DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << - ") is ephemeral.\n"); + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] + << ") is ephemeral.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1483,16 +1484,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // Check if this is a duplicate of another entry. if (TreeEntry *E = getTreeEntry(S.OpValue)) { - DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); if (!E->isSame(VL)) { - DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } // Record the reuse of the tree node. FIXME, currently this is only used to // properly draw the graph rather than for the actual vectorization. E->UserTreeIndices.push_back(UserTreeIdx); - DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue + << ".\n"); return; } @@ -1502,8 +1504,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!I) continue; if (getTreeEntry(I)) { - DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << - ") is already in tree.\n"); + LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] + << ") is already in tree.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1513,7 +1515,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // we need to gather the scalars. for (unsigned i = 0, e = VL.size(); i != e; ++i) { if (MustGather.count(VL[i])) { - DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1527,7 +1529,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!DT->isReachableFromEntry(BB)) { // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. - DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); + LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1545,9 +1547,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (UniqueValues.size() == VL.size()) { ReuseShuffleIndicies.clear(); } else { - DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); + LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) { - DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); + LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1561,14 +1563,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, BlockScheduling &BS = *BSRef.get(); if (!BS.tryScheduleBundle(VL, this, VL0)) { - DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); + LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); return; } - DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); + LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); unsigned ShuffleOrOp = S.IsAltShuffle ? (unsigned) Instruction::ShuffleVector : S.Opcode; @@ -1582,7 +1584,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, TerminatorInst *Term = dyn_cast<TerminatorInst>( cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i))); if (Term) { - DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); + LLVM_DEBUG( + dbgs() + << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); return; @@ -1590,7 +1594,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, } newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { ValueList Operands; @@ -1608,14 +1612,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, OrdersType CurrentOrder; bool Reuse = canReuseExtract(VL, VL0, CurrentOrder); if (Reuse) { - DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n"); + LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n"); ++NumOpsWantToKeepOriginalOrder; newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies); return; } if (!CurrentOrder.empty()) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "SLP: Reusing or shuffling of reordered extract sequence " "with order"; for (unsigned Idx : CurrentOrder) @@ -1631,7 +1635,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, StoredCurrentOrderAndNum->getFirst()); return; } - DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies); BS.cancelScheduling(VL, VL0); return; @@ -1649,7 +1653,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -1662,7 +1666,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!L->isSimple()) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } *POIter = L->getPointerOperand(); @@ -1693,20 +1697,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, ++NumOpsWantToKeepOriginalOrder; newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of loads.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++I->getSecond(); newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; } } - DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); return; @@ -1729,12 +1733,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); + LLVM_DEBUG(dbgs() + << "SLP: Gathering casts with different src types.\n"); return; } } newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of casts.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; @@ -1757,13 +1762,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); + LLVM_DEBUG(dbgs() + << "SLP: Gathering cmp with different predicate.\n"); return; } } newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of compares.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { ValueList Operands; @@ -1795,7 +1801,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, case Instruction::Or: case Instruction::Xor: newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); // Sort operands of the instructions so that each side is more likely to // have the same opcode. @@ -1821,7 +1827,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // We don't combine GEPs with complicated (nested) indexing. for (unsigned j = 0; j < VL.size(); ++j) { if (cast<Instruction>(VL[j])->getNumOperands() != 2) { - DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); + LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); return; @@ -1834,7 +1840,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, for (unsigned j = 0; j < VL.size(); ++j) { Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType(); if (Ty0 != CurTy) { - DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); + LLVM_DEBUG(dbgs() + << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); return; @@ -1845,8 +1852,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, for (unsigned j = 0; j < VL.size(); ++j) { auto Op = cast<Instruction>(VL[j])->getOperand(1); if (!isa<ConstantInt>(Op)) { - DEBUG( - dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); + LLVM_DEBUG(dbgs() + << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); return; @@ -1854,7 +1861,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, } newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -1871,12 +1878,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); + LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a vector of stores.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; for (Value *j : VL) @@ -1894,7 +1901,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); + LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } Function *Int = CI->getCalledFunction(); @@ -1908,8 +1915,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] - << "\n"); + LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] + << "\n"); return; } // ctlz,cttz and powi are special intrinsics whose second argument @@ -1919,9 +1926,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (A1I != A1J) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI - << " argument "<< A1I<<"!=" << A1J - << "\n"); + LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI + << " argument " << A1I << "!=" << A1J << "\n"); return; } } @@ -1932,8 +1938,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" - << *VL[i] << '\n'); + LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" + << *CI << "!=" << *VL[i] << '\n'); return; } } @@ -1956,11 +1962,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!S.IsAltShuffle) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); + LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. if (isa<BinaryOperator>(VL0)) { @@ -1984,7 +1990,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, default: BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); - DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } } @@ -2411,9 +2417,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { int VecCallCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF, VecTy->getNumElements()); - DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost - << " (" << VecCallCost << "-" << ScalarCallCost << ")" - << " for " << *CI << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost + << " (" << VecCallCost << "-" << ScalarCallCost << ")" + << " for " << *CI << "\n"); return ReuseShuffleCost + VecCallCost - ScalarCallCost; } @@ -2465,8 +2471,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } bool BoUpSLP::isFullyVectorizableTinyTree() { - DEBUG(dbgs() << "SLP: Check whether the tree with height " << - VectorizableTree.size() << " is fully vectorizable .\n"); + LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height " + << VectorizableTree.size() << " is fully vectorizable .\n"); // We only handle trees of heights 1 and 2. if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather) @@ -2536,7 +2542,7 @@ int BoUpSLP::getSpillCost() { LiveValues.insert(cast<Instruction>(&*J)); } - DEBUG({ + LLVM_DEBUG({ dbgs() << "SLP: #LV: " << LiveValues.size(); for (auto *X : LiveValues) dbgs() << " " << X->getName(); @@ -2575,8 +2581,8 @@ int BoUpSLP::getSpillCost() { int BoUpSLP::getTreeCost() { int Cost = 0; - DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << - VectorizableTree.size() << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size " + << VectorizableTree.size() << ".\n"); unsigned BundleWidth = VectorizableTree[0].Scalars.size(); @@ -2603,8 +2609,9 @@ int BoUpSLP::getTreeCost() { continue; int C = getEntryCost(&TE); - DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " - << *TE.Scalars[0] << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C + << " for bundle that starts with " << *TE.Scalars[0] + << ".\n"); Cost += C; } @@ -2649,7 +2656,7 @@ int BoUpSLP::getTreeCost() { << "SLP: Extract Cost = " << ExtractCost << ".\n" << "SLP: Total Cost = " << Cost << ".\n"; } - DEBUG(dbgs() << Str); + LLVM_DEBUG(dbgs() << Str); if (ViewSLPTree) ViewGraph(this, "SLP" + F->getName(), false, Str); @@ -3080,7 +3087,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { IRBuilder<>::InsertPointGuard Guard(Builder); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); return E->VectorizedValue; } @@ -3240,7 +3247,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *InVec = vectorizeTree(INVL); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } @@ -3268,7 +3275,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *R = vectorizeTree(RHSV); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } @@ -3303,7 +3310,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *False = vectorizeTree(FalseVec); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } @@ -3351,7 +3358,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *RHS = vectorizeTree(RHSVL); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } @@ -3509,7 +3516,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Value *OpVec = vectorizeTree(OpVL); - DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); + LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); } @@ -3547,7 +3554,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *RHS = vectorizeTree(RHSVL); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } @@ -3627,7 +3634,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { VectorizableTree[0].VectorizedValue = Trunc; } - DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n"); + LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() + << " values .\n"); // If necessary, sign-extend or zero-extend ScalarRoot to the larger type // specified by ScalarType. @@ -3713,7 +3721,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { User->replaceUsesOfWith(Scalar, Ex); } - DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n"); } // For each vectorized value: @@ -3734,7 +3742,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { if (!Ty->isVoidTy()) { #ifndef NDEBUG for (User *U : Scalar->users()) { - DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); // It is legal to replace users in the ignorelist by undef. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && @@ -3744,7 +3752,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { Value *Undef = UndefValue::get(Ty); Scalar->replaceAllUsesWith(Undef); } - DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast<Instruction>(Scalar)); } } @@ -3755,8 +3763,8 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { } void BoUpSLP::optimizeGatherSequence() { - DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() - << " gather sequences instructions.\n"); + LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() + << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. for (Instruction *I : GatherSeq) { if (!isa<InsertElementInst>(I) && !isa<ShuffleVectorInst>(I)) @@ -3849,7 +3857,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, ScheduleData *PrevInBundle = nullptr; ScheduleData *Bundle = nullptr; bool ReSchedule = false; - DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n"); + LLVM_DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n"); // Make sure that the scheduling region contains all // instructions of the bundle. @@ -3866,8 +3874,8 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, // A bundle member was scheduled as single instruction before and now // needs to be scheduled as part of the bundle. We just get rid of the // existing schedule. - DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember - << " was already scheduled\n"); + LLVM_DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember + << " was already scheduled\n"); ReSchedule = true; } assert(BundleMember->isSchedulingEntity() && @@ -3902,8 +3910,8 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, initialFillReadyList(ReadyInsts); } - DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " - << BB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " + << BB->getName() << "\n"); calculateDependencies(Bundle, true, SLP); @@ -3933,7 +3941,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL, return; ScheduleData *Bundle = getScheduleData(OpValue); - DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); + LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); assert(!Bundle->IsScheduled && "Can't cancel bundle which is already scheduled"); assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() && @@ -3992,7 +4000,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, if (isOneOf(OpValue, I) != I) CheckSheduleForI(I); assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); - DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); + LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); return true; } // Search up and down at the same time, because we don't know if the new @@ -4004,7 +4012,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, BasicBlock::iterator LowerEnd = BB->end(); while (true) { if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { - DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n"); + LLVM_DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n"); return false; } @@ -4014,7 +4022,8 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, ScheduleStart = I; if (isOneOf(OpValue, I) != I) CheckSheduleForI(I); - DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n"); + LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I + << "\n"); return true; } UpIter++; @@ -4027,7 +4036,8 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, if (isOneOf(OpValue, I) != I) CheckSheduleForI(I); assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); - DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n"); + LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I + << "\n"); return true; } DownIter++; @@ -4091,7 +4101,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, assert(isInSchedulingRegion(BundleMember)); if (!BundleMember->hasValidDependencies()) { - DEBUG(dbgs() << "SLP: update deps of " << *BundleMember << "\n"); + LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember + << "\n"); BundleMember->Dependencies = 0; BundleMember->resetUnscheduledDeps(); @@ -4192,7 +4203,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, } if (InsertInReadyList && SD->isReady()) { ReadyInsts.push_back(SD); - DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst << "\n"); + LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst + << "\n"); } } } @@ -4215,7 +4227,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { if (!BS->ScheduleStart) return; - DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n"); BS->resetSchedule(); @@ -4648,7 +4660,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, if (F.hasFnAttribute(Attribute::NoImplicitFloat)) return false; - DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); // Use the bottom up slp vectorizer to construct chains that start with // store instructions. @@ -4663,8 +4675,8 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Vectorize trees that end at stores. if (!Stores.empty()) { - DEBUG(dbgs() << "SLP: Found stores for " << Stores.size() - << " underlying objects.\n"); + LLVM_DEBUG(dbgs() << "SLP: Found stores for " << Stores.size() + << " underlying objects.\n"); Changed |= vectorizeStoreChains(R); } @@ -4675,16 +4687,16 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // is primarily intended to catch gather-like idioms ending at // non-consecutive loads. if (!GEPs.empty()) { - DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size() - << " underlying objects.\n"); + LLVM_DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size() + << " underlying objects.\n"); Changed |= vectorizeGEPIndices(BB, R); } } if (Changed) { R.optimizeGatherSequence(); - DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); - DEBUG(verifyFunction(F)); + LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); + LLVM_DEBUG(verifyFunction(F)); } return Changed; } @@ -4705,8 +4717,8 @@ static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, unsigned VecRegSize) { const unsigned ChainLen = Chain.size(); - DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen - << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen + << "\n"); const unsigned Sz = R.getVectorElementSize(Chain[0]); const unsigned VF = VecRegSize / Sz; @@ -4724,8 +4736,8 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) continue; - DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i - << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i + << "\n"); ArrayRef<Value *> Operands = Chain.slice(i, VF); R.buildTree(Operands); @@ -4736,9 +4748,10 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R, int Cost = R.getTreeCost(); - DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF + << "\n"); if (Cost < -SLPCostThreshold) { - DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); using namespace ore; @@ -4883,8 +4896,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (VL.size() < 2) return false; - DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = " << VL.size() - << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = " + << VL.size() << ".\n"); // Check that all of the parts are scalar instructions of the same type. Instruction *I0 = dyn_cast<Instruction>(VL[0]); @@ -4969,8 +4982,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) continue; - DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " - << "\n"); + LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " + << "\n"); ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); R.buildTree(Ops); @@ -4995,7 +5008,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, MinCost = std::min(MinCost, Cost); if (Cost < -SLPCostThreshold) { - DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList", cast<Instruction>(Ops[0])) << "SLP vectorized with cost " << ore::NV("Cost", Cost) @@ -5752,8 +5765,8 @@ public: break; } - DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost - << ". (HorRdx)\n"); + LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" + << Cost << ". (HorRdx)\n"); V.getORE()->emit([&]() { return OptimizationRemark( SV_NAME, "VectorizedHorizontalReduction", cast<Instruction>(VL[0])) @@ -5874,11 +5887,11 @@ private: } ScalarReduxCost *= (ReduxWidth - 1); - DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost - << " for reduction that starts with " << *FirstReducedVal - << " (It is a " - << (IsPairwiseReduction ? "pairwise" : "splitting") - << " reduction)\n"); + LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost + << " for reduction that starts with " << *FirstReducedVal + << " (It is a " + << (IsPairwiseReduction ? "pairwise" : "splitting") + << " reduction)\n"); return VecReduxCost - ScalarReduxCost; } @@ -6144,7 +6157,7 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, if (!findBuildAggregate(IVI, BuildVectorOpds)) return false; - DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); + LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); // Aggregate value is unlikely to be processed in vector register, we need to // extract scalars into scalar registers, so NeedExtraction is set true. return tryToVectorizeList(BuildVectorOpds, R); @@ -6234,8 +6247,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to vectorize them. unsigned NumElts = (SameTypeIt - IncIt); - DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts - << ")\n"); + LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs (" + << NumElts << ")\n"); // The order in which the phi nodes appear in the program does not matter. // So allow tryToVectorizeList to reorder them if it is beneficial. This // is done when there are exactly two elements since tryToVectorizeList @@ -6336,8 +6349,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { if (Entry.second.size() < 2) continue; - DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length " - << Entry.second.size() << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length " + << Entry.second.size() << ".\n"); // We process the getelementptr list in chunks of 16 (like we do for // stores) to minimize compile-time. @@ -6419,8 +6432,8 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { if (it->second.size() < 2) continue; - DEBUG(dbgs() << "SLP: Analyzing a store chain of length " - << it->second.size() << ".\n"); + LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " + << it->second.size() << ".\n"); // Process the stores in chunks of 16. // TODO: The limit of 16 inhibits greater vectorization factors. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 7146fcc098b..50c71a32385 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -116,7 +116,7 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { BasicBlock *PrevBB = CFG.PrevBB; BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), PrevBB->getParent(), CFG.LastBB); - DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); // Hook up the new basic block to its predecessors. for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { @@ -125,7 +125,7 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); - DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); if (isa<UnreachableInst>(PredBBTerminator)) { assert(PredVPSuccessors.size() == 1 && "Predecessor ending w/o branch must have single successor."); @@ -175,8 +175,8 @@ void VPBasicBlock::execute(VPTransformState *State) { } // 2. Fill the IR basic block with IR instructions. - DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() - << " in BB:" << NewBB->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() + << " in BB:" << NewBB->getName() << '\n'); State->CFG.VPBB2IRBB[this] = NewBB; State->CFG.PrevVPBB = this; @@ -184,7 +184,7 @@ void VPBasicBlock::execute(VPTransformState *State) { for (VPRecipeBase &Recipe : Recipes) Recipe.execute(*State); - DEBUG(dbgs() << "LV: filled BB:" << *NewBB); + LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); } void VPRegionBlock::execute(VPTransformState *State) { @@ -193,7 +193,7 @@ void VPRegionBlock::execute(VPTransformState *State) { if (!isReplicator()) { // Visit the VPBlocks connected to "this", starting from it. for (VPBlockBase *Block : RPOT) { - DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } return; @@ -210,7 +210,7 @@ void VPRegionBlock::execute(VPTransformState *State) { State->Instance->Lane = Lane; // Visit the VPBlocks connected to \p this, starting from it. for (VPBlockBase *Block : RPOT) { - DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); + LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); Block->execute(State); } } |