summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/VPlan.cpp
diff options
context:
space:
mode:
authorAyal Zaks <ayal.zaks@intel.com>2018-10-18 15:03:15 +0000
committerAyal Zaks <ayal.zaks@intel.com>2018-10-18 15:03:15 +0000
commitb0b5312e677ccbe568ffe4ea8247c4384d30b000 (patch)
tree8842218ea6576623d45b67fcf7125a660841b436 /llvm/lib/Transforms/Vectorize/VPlan.cpp
parenta1e6e65b9fd68490db04530a45c9333cf69b6213 (diff)
downloadbcm5719-llvm-b0b5312e677ccbe568ffe4ea8247c4384d30b000.tar.gz
bcm5719-llvm-b0b5312e677ccbe568ffe4ea8247c4384d30b000.zip
[LV] Fold tail by masking to vectorize loops of arbitrary trip count under opt for size
When optimizing for size, a loop is vectorized only if the resulting vector loop completely replaces the original scalar loop. This holds if no runtime guards are needed, if the original trip-count TC does not overflow, and if TC is a known constant that is a multiple of the VF. The last two TC-related conditions can be overcome by 1. rounding the trip-count of the vector loop up from TC to a multiple of VF; 2. masking the vector body under a newly introduced "if (i <= TC-1)" condition. The patch allows loops with arbitrary trip counts to be vectorized under -Os, subject to the existing cost model considerations. It also applies to loops with small trip counts (under -O2) which are currently handled as if under -Os. The patch does not handle loops with reductions, live-outs, or w/o a primary induction variable, and disallows interleave groups. (Third, final and main part of -) Differential Revision: https://reviews.llvm.org/D50480 llvm-svn: 344743
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlan.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp24
1 files changed, 23 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 39cb4e9ec68..a3c15a36b05 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -303,6 +303,13 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, V, Part);
break;
}
+ case VPInstruction::ICmpULE: {
+ Value *IV = State.get(getOperand(0), Part);
+ Value *TC = State.get(getOperand(1), Part);
+ Value *V = Builder.CreateICmpULE(IV, TC);
+ State.set(this, V, Part);
+ break;
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -328,6 +335,9 @@ void VPInstruction::print(raw_ostream &O) const {
case VPInstruction::Not:
O << "not";
break;
+ case VPInstruction::ICmpULE:
+ O << "icmp ule";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -342,6 +352,15 @@ void VPInstruction::print(raw_ostream &O) const {
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
+ // -1. Check if the backedge taken count is needed, and if so build it.
+ if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+ Value *TC = State->TripCount;
+ IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
+ auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
+ "trip.count.minus.1");
+ Value2VPValue[TCMO] = BackedgeTakenCount;
+ }
+
// 0. Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;
@@ -469,8 +488,11 @@ void VPlanPrinter::dump() {
OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
if (!Plan.getName().empty())
OS << "\\n" << DOT::EscapeString(Plan.getName());
- if (!Plan.Value2VPValue.empty()) {
+ if (!Plan.Value2VPValue.empty() || Plan.BackedgeTakenCount) {
OS << ", where:";
+ if (Plan.BackedgeTakenCount)
+ OS << "\\n"
+ << *Plan.getOrCreateBackedgeTakenCount() << " := BackedgeTakenCount";
for (auto Entry : Plan.Value2VPValue) {
OS << "\\n" << *Entry.second;
OS << DOT::EscapeString(" := ");
OpenPOWER on IntegriCloud