diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 253 |
1 files changed, 138 insertions, 115 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index eefaf22d028..a65dc09baa6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1628,20 +1628,20 @@ static bool isExplicitVecOuterLoop(Loop *OuterLp, Function *Fn = OuterLp->getHeader()->getParent(); if (!Hints.allowVectorization(Fn, OuterLp, false /*AlwaysVectorize*/)) { - DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n"); + LLVM_DEBUG(dbgs() << "LV: Loop hints prevent outer loop vectorization.\n"); return false; } if (!Hints.getWidth()) { - DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No user vector width.\n"); emitMissedWarning(Fn, OuterLp, Hints, ORE); return false; } if (Hints.getInterleave() > 1) { // TODO: Interleave support is future work. - DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " - "outer loops.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Interleave is not supported for " + "outer loops.\n"); emitMissedWarning(Fn, OuterLp, Hints, ORE); return false; } @@ -4123,7 +4123,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { default: // This instruction is not vectorized by simple widening. - DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); + LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); llvm_unreachable("Unhandled instruction!"); } // end of switch. } @@ -4235,7 +4235,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { } for (auto *I : ScalarPtrs) if (!PossibleNonScalarPtrs.count(I)) { - DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n"); Worklist.insert(I); } @@ -4252,8 +4252,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { continue; Worklist.insert(Ind); Worklist.insert(IndUpdate); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate + << "\n"); } // Insert the forced scalars. @@ -4280,7 +4281,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { isScalarUse(J, Src)); })) { Worklist.insert(Src); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n"); } } @@ -4320,8 +4321,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // The induction variable and its update instruction will remain scalar. Worklist.insert(Ind); Worklist.insert(IndUpdate); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate + << "\n"); } Scalars[VF].insert(Worklist.begin(), Worklist.end()); @@ -4413,7 +4415,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0)); if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) { Worklist.insert(Cmp); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n"); } // Holds consecutive and consecutive-like pointers. Consecutive-like pointers @@ -4474,7 +4476,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // aren't also identified as possibly non-uniform. for (auto *V : ConsecutiveLikePtrs) if (!PossibleNonUniformPtrs.count(V)) { - DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n"); Worklist.insert(V); } @@ -4497,7 +4499,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { isUniformDecision(J, VF)); })) { Worklist.insert(OI); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n"); } } } @@ -4542,8 +4544,9 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // The induction variable and its update instruction will remain uniform. Worklist.insert(Ind); Worklist.insert(IndUpdate); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n"); + LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate + << "\n"); } Uniforms[VF].insert(Worklist.begin(), Worklist.end()); @@ -4630,7 +4633,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses( // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. void InterleavedAccessInfo::analyzeInterleaving() { - DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); + LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); const ValueToValueMap &Strides = LAI->getSymbolicStrides(); // Holds all accesses with a constant stride. @@ -4672,7 +4675,8 @@ void InterleavedAccessInfo::analyzeInterleaving() { if (isStrided(DesB.Stride)) { Group = getInterleaveGroup(B); if (!Group) { - DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B << '\n'); + LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B + << '\n'); Group = createInterleaveGroup(B, DesB.Stride, DesB.Align); } if (B->mayWriteToMemory()) @@ -4775,8 +4779,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { // Try to insert A into B's group. if (Group->insertMember(A, IndexA, DesA.Align)) { - DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' - << " into the interleave group with" << *B << '\n'); + LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n' + << " into the interleave group with" << *B + << '\n'); InterleaveGroupMap[A] = Group; // Set the first load in program order as the insert position. @@ -4789,8 +4794,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { // Remove interleaved store groups with gaps. for (InterleaveGroup *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved store group due " - "to gaps.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved store group due " + "to gaps.\n"); releaseGroup(Group); } // Remove interleaved groups with gaps (currently only loads) whose memory @@ -4822,8 +4828,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0)); if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " - "first group member potentially pointer-wrapping.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "first group member potentially pointer-wrapping.\n"); releaseGroup(Group); continue; } @@ -4832,8 +4839,9 @@ void InterleavedAccessInfo::analyzeInterleaving() { Value *LastMemberPtr = getLoadStorePointerOperand(LastMember); if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, /*ShouldCheckWrap=*/true)) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " - "last group member potentially pointer-wrapping.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "last group member potentially pointer-wrapping.\n"); releaseGroup(Group); } } else { @@ -4843,12 +4851,14 @@ void InterleavedAccessInfo::analyzeInterleaving() { // to look for a member at index factor - 1, since every group must have // a member at index zero. if (Group->isReverse()) { - DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to " - "a reverse access with gaps.\n"); + LLVM_DEBUG( + dbgs() << "LV: Invalidate candidate interleaved group due to " + "a reverse access with gaps.\n"); releaseGroup(Group); continue; } - DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); + LLVM_DEBUG( + dbgs() << "LV: Interleaved group requires epilogue iteration.\n"); RequiresScalarEpilogue = true; } } @@ -4858,7 +4868,8 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) { // TODO: It may by useful to do since it's still likely to be dynamically // uniform if the target can skip. - DEBUG(dbgs() << "LV: Not inserting runtime ptr check for divergent target"); + LLVM_DEBUG( + dbgs() << "LV: Not inserting runtime ptr check for divergent target"); ORE->emit( createMissedAnalysis("CantVersionLoopWithDivergentTarget") @@ -4876,20 +4887,22 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { << "runtime pointer checks needed. Enable vectorization of this " "loop with '#pragma clang loop vectorize(enable)' when " "compiling with -Os/-Oz"); - DEBUG(dbgs() - << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); + LLVM_DEBUG( + dbgs() + << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n"); return None; } // If we optimize the program for size, avoid creating the tail loop. - DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n'); // If we don't know the precise trip count, don't try to vectorize. if (TC < 2) { ORE->emit( createMissedAnalysis("UnknownLoopCountComplexCFG") << "unable to calculate the loop count due to complex control flow"); - DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); + LLVM_DEBUG( + dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return None; } @@ -4907,7 +4920,8 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { "same time. Enable vectorization of this loop " "with '#pragma clang loop vectorize(enable)' " "when compiling with -Os/-Oz"); - DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); + LLVM_DEBUG( + dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n"); return None; } @@ -4932,23 +4946,23 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, unsigned MaxVectorSize = WidestRegister / WidestType; - DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / " - << WidestType << " bits.\n"); - DEBUG(dbgs() << "LV: The Widest register safe to use is: " << WidestRegister - << " bits.\n"); + LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType + << " / " << WidestType << " bits.\n"); + LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: " + << WidestRegister << " bits.\n"); assert(MaxVectorSize <= 256 && "Did not expect to pack so many elements" " into one vector!"); if (MaxVectorSize == 0) { - DEBUG(dbgs() << "LV: The target has no vector registers.\n"); + LLVM_DEBUG(dbgs() << "LV: The target has no vector registers.\n"); MaxVectorSize = 1; return MaxVectorSize; } else if (ConstTripCount && ConstTripCount < MaxVectorSize && isPowerOf2_32(ConstTripCount)) { // We need to clamp the VF to be the ConstTripCount. There is no point in // choosing a higher viable VF as done in the loop below. - DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " - << ConstTripCount << "\n"); + LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to the constant trip count: " + << ConstTripCount << "\n"); MaxVectorSize = ConstTripCount; return MaxVectorSize; } @@ -4977,8 +4991,8 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize, } if (unsigned MinVF = TTI.getMinimumVF(SmallestType)) { if (MaxVF < MinVF) { - DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF - << ") with target's minimum: " << MinVF << '\n'); + LLVM_DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF + << ") with target's minimum: " << MinVF << '\n'); MaxVF = MinVF; } } @@ -4991,7 +5005,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { float Cost = expectedCost(1).first; const float ScalarCost = Cost; unsigned Width = 1; - DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; // Ignore scalar width, because the user explicitly wants vectorization. @@ -5006,10 +5020,10 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { // the vector elements. VectorizationCostTy C = expectedCost(i); float VectorCost = C.first / (float)i; - DEBUG(dbgs() << "LV: Vector loop of width " << i - << " costs: " << (int)VectorCost << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i + << " costs: " << (int)VectorCost << ".\n"); if (!C.second && !ForceVectorization) { - DEBUG( + LLVM_DEBUG( dbgs() << "LV: Not considering vector loop of width " << i << " because it will not generate any vector instructions.\n"); continue; @@ -5023,15 +5037,16 @@ LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) { if (!EnableCondStoresVectorization && NumPredStores) { ORE->emit(createMissedAnalysis("ConditionalStore") << "store that is conditionally executed prevents vectorization"); - DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n"); + LLVM_DEBUG( + dbgs() << "LV: No vectorization. There are conditional stores.\n"); Width = 1; Cost = ScalarCost; } - DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() - << "LV: Vectorization seems to be not beneficial, " - << "but was forced by a user.\n"); - DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); + LLVM_DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() + << "LV: Vectorization seems to be not beneficial, " + << "but was forced by a user.\n"); + LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n"); VectorizationFactor Factor = {Width, (unsigned)(Width * Cost)}; return Factor; } @@ -5123,8 +5138,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1); - DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters - << " registers\n"); + LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters + << " registers\n"); if (VF == 1) { if (ForceTargetNumScalarRegs.getNumOccurrences() > 0) @@ -5182,7 +5197,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. if (VF > 1 && !Legal->getReductionVars()->empty()) { - DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); return IC; } @@ -5193,7 +5208,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // We want to interleave small loops in order to reduce the loop overhead and // potentially expose ILP opportunities. - DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'); + LLVM_DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n'); if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) { // We assume that the cost overhead is 1 and we use the cost model // to estimate the cost of the loop and interleave until the cost of the @@ -5221,11 +5236,12 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, if (EnableLoadStoreRuntimeInterleave && std::max(StoresIC, LoadsIC) > SmallIC) { - DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n"); + LLVM_DEBUG( + dbgs() << "LV: Interleaving to saturate store or load ports.\n"); return std::max(StoresIC, LoadsIC); } - DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n"); return SmallIC; } @@ -5233,11 +5249,11 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // this point) that could benefit from interleaving. bool HasReductions = !Legal->getReductionVars()->empty(); if (TTI.enableAggressiveInterleaving(HasReductions)) { - DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); return IC; } - DEBUG(dbgs() << "LV: Not Interleaving.\n"); + LLVM_DEBUG(dbgs() << "LV: Not Interleaving.\n"); return 1; } @@ -5327,7 +5343,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { SmallVector<RegisterUsage, 8> RUs(VFs.size()); SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0); - DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); + LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n"); // A lambda that gets the register usage for the given type and VF. auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) { @@ -5372,8 +5388,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { MaxUsages[j] = std::max(MaxUsages[j], RegUsage); } - DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " - << OpenIntervals.size() << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " + << OpenIntervals.size() << '\n'); // Add the current instruction to the list of open intervals. OpenIntervals.insert(I); @@ -5388,9 +5404,10 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) { Invariant += GetRegUsage(Inst->getType(), VFs[i]); } - DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); - DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); - DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n'); + LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant + << '\n'); RU.LoopInvariantRegs = Invariant; RU.MaxLocalUsers = MaxUsages[i]; @@ -5587,8 +5604,9 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) { BlockCost.first += C.first; BlockCost.second |= C.second; - DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first << " for VF " - << VF << " For instruction: " << I << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C.first + << " for VF " << VF << " For instruction: " << I + << '\n'); } // If we are vectorizing a predicated block, it will have been @@ -6247,14 +6265,15 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize, assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); assert(UserVF && "Expected UserVF for outer loop vectorization."); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); - DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); buildVPlans(UserVF, UserVF); return {UserVF, 0}; } - DEBUG(dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the " - "VPlan-native path.\n"); + LLVM_DEBUG( + dbgs() << "LV: Not vectorizing. Inner loops aren't supported in the " + "VPlan-native path.\n"); return NoVectorization; } @@ -6268,13 +6287,13 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { return NoVectorization; if (UserVF) { - DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); + LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); buildVPlans(UserVF, UserVF); - DEBUG(printPlans(dbgs())); + LLVM_DEBUG(printPlans(dbgs())); return {UserVF, 0}; } @@ -6292,7 +6311,7 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { } buildVPlans(1, MaxVF); - DEBUG(printPlans(dbgs())); + LLVM_DEBUG(printPlans(dbgs())); if (MaxVF == 1) return NoVectorization; @@ -6301,7 +6320,8 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { } void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) { - DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF << '\n'); + LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF + << '\n'); BestVF = VF; BestUF = UF; @@ -6777,11 +6797,11 @@ VPBasicBlock *LoopVectorizationPlanner::handleReplication( // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { - DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); + LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); VPBB->appendRecipe(Recipe); return VPBB; } - DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); + LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); assert(VPBB->getSuccessors().empty() && "VPBB has successors when handling predicated replication."); // Record predicated instructions for above packing optimizations. @@ -6906,8 +6926,9 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range, // should follow. auto SAIt = SinkAfter.find(Instr); if (SAIt != SinkAfter.end()) { - DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" << *SAIt->second - << " to vectorize a 1st order recurrence.\n"); + LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after" + << *SAIt->second + << " to vectorize a 1st order recurrence.\n"); SinkAfterInverse[SAIt->second] = Instr; continue; } @@ -7208,21 +7229,22 @@ bool LoopVectorizePass::processLoop(Loop *L) { const std::string DebugLocStr = getDebugLocString(L); #endif /* NDEBUG */ - DEBUG(dbgs() << "\nLV: Checking a loop in \"" - << L->getHeader()->getParent()->getName() << "\" from " - << DebugLocStr << "\n"); + LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in \"" + << L->getHeader()->getParent()->getName() << "\" from " + << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, DisableUnrolling, *ORE); - DEBUG(dbgs() << "LV: Loop hints:" - << " force=" - << (Hints.getForce() == LoopVectorizeHints::FK_Disabled - ? "disabled" - : (Hints.getForce() == LoopVectorizeHints::FK_Enabled - ? "enabled" - : "?")) - << " width=" << Hints.getWidth() - << " unroll=" << Hints.getInterleave() << "\n"); + LLVM_DEBUG( + dbgs() << "LV: Loop hints:" + << " force=" + << (Hints.getForce() == LoopVectorizeHints::FK_Disabled + ? "disabled" + : (Hints.getForce() == LoopVectorizeHints::FK_Enabled + ? "enabled" + : "?")) + << " width=" << Hints.getWidth() + << " unroll=" << Hints.getInterleave() << "\n"); // Function containing loop Function *F = L->getHeader()->getParent(); @@ -7236,7 +7258,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // benefit from vectorization, respectively. if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { - DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); + LLVM_DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); return false; } @@ -7247,7 +7269,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE, &Requirements, &Hints, DB, AC); if (!LVL.canVectorize(EnableVPlanNativePath)) { - DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints, ORE); return false; } @@ -7297,13 +7319,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { } if (HasExpectedTC && ExpectedTC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " - << "This loop is worth vectorizing only if no scalar " - << "iteration overheads are incurred."); + LLVM_DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is worth vectorizing only if no scalar " + << "iteration overheads are incurred."); if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) - DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + LLVM_DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\n"); // Loops with a very small trip count are considered for vectorization // under OptForSize, thereby making sure the cost of their loop body is // dominant, free of runtime guards and scalar iteration overheads. @@ -7316,8 +7338,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // an integer loop and the vector instructions selected are purely integer // vector instructions? if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { - DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); + LLVM_DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" + "attribute is used.\n"); ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "NoImplicitFloat", L) << "loop not vectorized due to NoImplicitFloat attribute"); @@ -7331,7 +7353,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // additional fp-math flags can help. if (Hints.isPotentiallyUnsafe() && TTI->isFPVectorizationPotentiallyUnsafe()) { - DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); + LLVM_DEBUG( + dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n"); ORE->emit( createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L) << "loop not vectorized due to unsafe FP support."); @@ -7375,14 +7398,14 @@ bool LoopVectorizePass::processLoop(Loop *L) { std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg; bool VectorizeLoop = true, InterleaveLoop = true; if (Requirements.doesNotMeet(F, L, Hints)) { - DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " - "requirements.\n"); + LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " + "requirements.\n"); emitMissedWarning(F, L, Hints, ORE); return false; } if (VF.Width == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + LLVM_DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); VecDiagMsg = std::make_pair( "VectorizationNotBeneficial", "the cost-model indicates that vectorization is not beneficial"); @@ -7391,7 +7414,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { if (IC == 1 && UserIC <= 1) { // Tell the user interleaving is not beneficial. - DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); + LLVM_DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); IntDiagMsg = std::make_pair( "InterleavingNotBeneficial", "the cost-model indicates that interleaving is not beneficial"); @@ -7403,8 +7426,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { } } else if (IC > 1 && UserIC == 1) { // Tell the user interleaving is beneficial, but it explicitly disabled. - DEBUG(dbgs() - << "LV: Interleaving is beneficial but is explicitly disabled."); + LLVM_DEBUG( + dbgs() << "LV: Interleaving is beneficial but is explicitly disabled."); IntDiagMsg = std::make_pair( "InterleavingBeneficialButDisabled", "the cost-model indicates that interleaving is beneficial " @@ -7431,24 +7454,24 @@ bool LoopVectorizePass::processLoop(Loop *L) { }); return false; } else if (!VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first, L->getStartLoc(), L->getHeader()) << VecDiagMsg.second; }); } else if (VectorizeLoop && !InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width + << ") in " << DebugLocStr << '\n'); ORE->emit([&]() { return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first, L->getStartLoc(), L->getHeader()) << IntDiagMsg.second; }); } else if (VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width + << ") in " << DebugLocStr << '\n'); + LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); } LVP.setBestPlan(VF.Width, IC); @@ -7495,7 +7518,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Mark the loop as already vectorized to avoid vectorizing again. Hints.setAlreadyVectorized(); - DEBUG(verifyFunction(*L->getHeader()->getParent())); + LLVM_DEBUG(verifyFunction(*L->getHeader()->getParent())); return true; } |