summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp184
1 files changed, 67 insertions, 117 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e1e87f08b39..fa2c71dd802 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -277,6 +277,32 @@ static Type *ToVectorTy(Type *Scalar, unsigned VF) {
return VectorType::get(Scalar, VF);
}
+/// A helper function that returns GEP instruction and knows to skip a
+/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination
+/// pointee types of the 'bitcast' have the same size.
+/// For example:
+/// bitcast double** %var to i64* - can be skipped
+/// bitcast double** %var to i8* - can not
+static GetElementPtrInst *getGEPInstruction(Value *Ptr) {
+
+ if (isa<GetElementPtrInst>(Ptr))
+ return cast<GetElementPtrInst>(Ptr);
+
+ if (isa<BitCastInst>(Ptr) &&
+ isa<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0))) {
+ Type *BitcastTy = Ptr->getType();
+ Type *GEPTy = cast<BitCastInst>(Ptr)->getSrcTy();
+ if (!isa<PointerType>(BitcastTy) || !isa<PointerType>(GEPTy))
+ return nullptr;
+ Type *Pointee1Ty = cast<PointerType>(BitcastTy)->getPointerElementType();
+ Type *Pointee2Ty = cast<PointerType>(GEPTy)->getPointerElementType();
+ const DataLayout &DL = cast<BitCastInst>(Ptr)->getModule()->getDataLayout();
+ if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty))
+ return cast<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0));
+ }
+ return nullptr;
+}
+
// FIXME: The following helper functions have multiple implementations
// in the project. They can be effectively organized in a common Load/Store
// utilities unit.
@@ -2971,12 +2997,40 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
VectorParts VectorGep;
// Handle consecutive loads/stores.
+ GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (ConsecutiveStride) {
Ptr = getScalarValue(Ptr, 0, 0);
} else {
// At this point we should vector version of GEP for Gather or Scatter
assert(CreateGatherScatter && "The instruction should be scalarized");
- VectorGep = getVectorValue(Ptr);
+ if (Gep) {
+ // Vectorizing GEP, across UF parts. We want to get a vector value for base
+ // and each index that's defined inside the loop, even if it is
+ // loop-invariant but wasn't hoisted out. Otherwise we want to keep them
+ // scalar.
+ SmallVector<VectorParts, 4> OpsV;
+ for (Value *Op : Gep->operands()) {
+ Instruction *SrcInst = dyn_cast<Instruction>(Op);
+ if (SrcInst && OrigLoop->contains(SrcInst))
+ OpsV.push_back(getVectorValue(Op));
+ else
+ OpsV.push_back(VectorParts(UF, Op));
+ }
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Ops;
+ Value *GEPBasePtr = OpsV[0][Part];
+ for (unsigned i = 1; i < Gep->getNumOperands(); i++)
+ Ops.push_back(OpsV[i][Part]);
+ Value *NewGep = Builder.CreateGEP(GEPBasePtr, Ops, "VectorGep");
+ cast<GetElementPtrInst>(NewGep)->setIsInBounds(Gep->isInBounds());
+ assert(NewGep->getType()->isVectorTy() && "Expected vector GEP");
+
+ NewGep =
+ Builder.CreateBitCast(NewGep, VectorType::get(Ptr->getType(), VF));
+ VectorGep.push_back(NewGep);
+ }
+ } else
+ VectorGep = getVectorValue(Ptr);
}
VectorParts Mask = createBlockInMask(Instr->getParent());
@@ -4736,72 +4790,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB) {
widenPHIInstruction(&I, UF, VF);
continue;
} // End of PHI.
- case Instruction::GetElementPtr: {
- // Construct a vector GEP by widening the operands of the scalar GEP as
- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
- // results in a vector of pointers when at least one operand of the GEP
- // is vector-typed. Thus, to keep the representation compact, we only use
- // vector-typed operands for loop-varying values.
- auto *GEP = cast<GetElementPtrInst>(&I);
- VectorParts Entry(UF);
-
- if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
- // If we are vectorizing, but the GEP has only loop-invariant operands,
- // the GEP we build (by only using vector-typed operands for
- // loop-varying values) would be a scalar pointer. Thus, to ensure we
- // produce a vector of pointers, we need to either arbitrarily pick an
- // operand to broadcast, or broadcast a clone of the original GEP.
- // Here, we broadcast a clone of the original.
- //
- // TODO: If at some point we decide to scalarize instructions having
- // loop-invariant operands, this special case will no longer be
- // required. We would add the scalarization decision to
- // collectLoopScalars() and teach getVectorValue() to broadcast
- // the lane-zero scalar value.
- auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = Builder.CreateVectorSplat(VF, Clone);
- } else {
- // If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < UF; ++Part) {
-
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand())
- ? GEP->getPointerOperand()
- : getVectorValue(GEP->getPointerOperand())[Part];
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) {
- if (OrigLoop->isLoopInvariant(U.get()))
- Indices.push_back(U.get());
- else
- Indices.push_back(getVectorValue(U.get())[Part]);
- }
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP = GEP->isInBounds()
- ? Builder.CreateInBoundsGEP(Ptr, Indices)
- : Builder.CreateGEP(Ptr, Indices);
- assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- Entry[Part] = NewGEP;
- }
- }
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, GEP);
- break;
- }
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::SRem:
@@ -5492,58 +5481,21 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// If an instruction is uniform after vectorization, it will remain scalar.
Scalars[VF].insert(Uniforms[VF].begin(), Uniforms[VF].end());
- // These sets are used to seed the analysis of loop scalars with memory
- // access pointer operands that will remain scalar.
- SmallSetVector<Instruction *, 8> ScalarPtrs;
- SmallPtrSet<Instruction *, 8> PossibleNonScalarPtrs;
-
- // Returns true if the given instruction will not be a gather or scatter
- // operation with vectorization factor VF.
- auto isScalarDecision = [&](Instruction *I, unsigned VF) {
- InstWidening WideningDecision = getWideningDecision(I, VF);
- assert(WideningDecision != CM_Unknown &&
- "Widening decision should be ready at this moment");
- return WideningDecision != CM_GatherScatter;
- };
-
- // Collect the initial values that we know will not be vectorized. A value
- // will remain scalar if it is only used as the pointer operand of memory
- // accesses that are not gather or scatter operations.
- for (auto *BB : TheLoop->blocks()) {
+ // Collect the getelementptr instructions that will not be vectorized. A
+ // getelementptr instruction is only vectorized if it is used for a legal
+ // gather or scatter operation.
+ for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
-
- // If there's no pointer operand or the pointer operand is not an
- // instruction, there's nothing to do.
- auto *Ptr = dyn_cast_or_null<Instruction>(getPointerOperand(&I));
- if (!Ptr)
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ Scalars[VF].insert(GEP);
continue;
-
- // If the pointer has already been identified as scalar (e.g., if it was
- // also inditifed as uniform), there's nothing to do.
- if (Scalars[VF].count(Ptr))
+ }
+ auto *Ptr = getPointerOperand(&I);
+ if (!Ptr)
continue;
-
- // True if all users of Ptr are memory accesses that have Ptr as their
- // pointer operand.
- auto UsersAreMemAccesses = all_of(Ptr->users(), [&](User *U) -> bool {
- return getPointerOperand(U) == Ptr;
- });
-
- // If the pointer is used by an instruction other than a memory access,
- // it may not remain scalar. If the memory access is a gather or scatter
- // operation, the pointer will not remain scalar.
- if (!UsersAreMemAccesses || !isScalarDecision(&I, VF))
- PossibleNonScalarPtrs.insert(Ptr);
- else
- ScalarPtrs.insert(Ptr);
- }
- }
-
- // Add to the set of scalars all the pointers we know will not be vectorized.
- for (auto *I : ScalarPtrs)
- if (!PossibleNonScalarPtrs.count(I)) {
- DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n");
- Scalars[VF].insert(I);
+ auto *GEP = getGEPInstruction(Ptr);
+ if (GEP && getWideningDecision(&I, VF) == CM_GatherScatter)
+ Scalars[VF].erase(GEP);
}
// An induction variable will remain scalar if all users of the induction
@@ -5574,8 +5526,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// The induction variable and its update instruction will remain scalar.
Scalars[VF].insert(Ind);
Scalars[VF].insert(IndUpdate);
- DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
- DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n");
}
}
OpenPOWER on IntegriCloud