diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 184 |
1 files changed, 67 insertions, 117 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e1e87f08b39..fa2c71dd802 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -277,6 +277,32 @@ static Type *ToVectorTy(Type *Scalar, unsigned VF) { return VectorType::get(Scalar, VF); } +/// A helper function that returns GEP instruction and knows to skip a +/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination +/// pointee types of the 'bitcast' have the same size. +/// For example: +/// bitcast double** %var to i64* - can be skipped +/// bitcast double** %var to i8* - can not +static GetElementPtrInst *getGEPInstruction(Value *Ptr) { + + if (isa<GetElementPtrInst>(Ptr)) + return cast<GetElementPtrInst>(Ptr); + + if (isa<BitCastInst>(Ptr) && + isa<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0))) { + Type *BitcastTy = Ptr->getType(); + Type *GEPTy = cast<BitCastInst>(Ptr)->getSrcTy(); + if (!isa<PointerType>(BitcastTy) || !isa<PointerType>(GEPTy)) + return nullptr; + Type *Pointee1Ty = cast<PointerType>(BitcastTy)->getPointerElementType(); + Type *Pointee2Ty = cast<PointerType>(GEPTy)->getPointerElementType(); + const DataLayout &DL = cast<BitCastInst>(Ptr)->getModule()->getDataLayout(); + if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty)) + return cast<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0)); + } + return nullptr; +} + // FIXME: The following helper functions have multiple implementations // in the project. They can be effectively organized in a common Load/Store // utilities unit. @@ -2971,12 +2997,40 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { VectorParts VectorGep; // Handle consecutive loads/stores. + GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (ConsecutiveStride) { Ptr = getScalarValue(Ptr, 0, 0); } else { // At this point we should vector version of GEP for Gather or Scatter assert(CreateGatherScatter && "The instruction should be scalarized"); - VectorGep = getVectorValue(Ptr); + if (Gep) { + // Vectorizing GEP, across UF parts. We want to get a vector value for base + // and each index that's defined inside the loop, even if it is + // loop-invariant but wasn't hoisted out. Otherwise we want to keep them + // scalar. + SmallVector<VectorParts, 4> OpsV; + for (Value *Op : Gep->operands()) { + Instruction *SrcInst = dyn_cast<Instruction>(Op); + if (SrcInst && OrigLoop->contains(SrcInst)) + OpsV.push_back(getVectorValue(Op)); + else + OpsV.push_back(VectorParts(UF, Op)); + } + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector<Value *, 4> Ops; + Value *GEPBasePtr = OpsV[0][Part]; + for (unsigned i = 1; i < Gep->getNumOperands(); i++) + Ops.push_back(OpsV[i][Part]); + Value *NewGep = Builder.CreateGEP(GEPBasePtr, Ops, "VectorGep"); + cast<GetElementPtrInst>(NewGep)->setIsInBounds(Gep->isInBounds()); + assert(NewGep->getType()->isVectorTy() && "Expected vector GEP"); + + NewGep = + Builder.CreateBitCast(NewGep, VectorType::get(Ptr->getType(), VF)); + VectorGep.push_back(NewGep); + } + } else + VectorGep = getVectorValue(Ptr); } VectorParts Mask = createBlockInMask(Instr->getParent()); @@ -4736,72 +4790,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB) { widenPHIInstruction(&I, UF, VF); continue; } // End of PHI. - case Instruction::GetElementPtr: { - // Construct a vector GEP by widening the operands of the scalar GEP as - // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP - // results in a vector of pointers when at least one operand of the GEP - // is vector-typed. Thus, to keep the representation compact, we only use - // vector-typed operands for loop-varying values. - auto *GEP = cast<GetElementPtrInst>(&I); - VectorParts Entry(UF); - - if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) { - // If we are vectorizing, but the GEP has only loop-invariant operands, - // the GEP we build (by only using vector-typed operands for - // loop-varying values) would be a scalar pointer. Thus, to ensure we - // produce a vector of pointers, we need to either arbitrarily pick an - // operand to broadcast, or broadcast a clone of the original GEP. - // Here, we broadcast a clone of the original. - // - // TODO: If at some point we decide to scalarize instructions having - // loop-invariant operands, this special case will no longer be - // required. We would add the scalarization decision to - // collectLoopScalars() and teach getVectorValue() to broadcast - // the lane-zero scalar value. - auto *Clone = Builder.Insert(GEP->clone()); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = Builder.CreateVectorSplat(VF, Clone); - } else { - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers. But if we are only unrolling, we want - // to produce a scalar GEP for each unroll part. Thus, the GEP we - // produce with the code below will be scalar (if VF == 1) or vector - // (otherwise). Note that for the unroll-only case, we still maintain - // values in the vector mapping with initVector, as we do for other - // instructions. - for (unsigned Part = 0; Part < UF; ++Part) { - - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand()) - ? GEP->getPointerOperand() - : getVectorValue(GEP->getPointerOperand())[Part]; - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector<Value *, 4> Indices; - for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) { - if (OrigLoop->isLoopInvariant(U.get())) - Indices.push_back(U.get()); - else - Indices.push_back(getVectorValue(U.get())[Part]); - } - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = GEP->isInBounds() - ? Builder.CreateInBoundsGEP(Ptr, Indices) - : Builder.CreateGEP(Ptr, Indices); - assert((VF == 1 || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - Entry[Part] = NewGEP; - } - } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, GEP); - break; - } case Instruction::UDiv: case Instruction::SDiv: case Instruction::SRem: @@ -5492,58 +5481,21 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // If an instruction is uniform after vectorization, it will remain scalar. Scalars[VF].insert(Uniforms[VF].begin(), Uniforms[VF].end()); - // These sets are used to seed the analysis of loop scalars with memory - // access pointer operands that will remain scalar. - SmallSetVector<Instruction *, 8> ScalarPtrs; - SmallPtrSet<Instruction *, 8> PossibleNonScalarPtrs; - - // Returns true if the given instruction will not be a gather or scatter - // operation with vectorization factor VF. - auto isScalarDecision = [&](Instruction *I, unsigned VF) { - InstWidening WideningDecision = getWideningDecision(I, VF); - assert(WideningDecision != CM_Unknown && - "Widening decision should be ready at this moment"); - return WideningDecision != CM_GatherScatter; - }; - - // Collect the initial values that we know will not be vectorized. A value - // will remain scalar if it is only used as the pointer operand of memory - // accesses that are not gather or scatter operations. - for (auto *BB : TheLoop->blocks()) { + // Collect the getelementptr instructions that will not be vectorized. A + // getelementptr instruction is only vectorized if it is used for a legal + // gather or scatter operation. + for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { - - // If there's no pointer operand or the pointer operand is not an - // instruction, there's nothing to do. - auto *Ptr = dyn_cast_or_null<Instruction>(getPointerOperand(&I)); - if (!Ptr) + if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { + Scalars[VF].insert(GEP); continue; - - // If the pointer has already been identified as scalar (e.g., if it was - // also inditifed as uniform), there's nothing to do. - if (Scalars[VF].count(Ptr)) + } + auto *Ptr = getPointerOperand(&I); + if (!Ptr) continue; - - // True if all users of Ptr are memory accesses that have Ptr as their - // pointer operand. - auto UsersAreMemAccesses = all_of(Ptr->users(), [&](User *U) -> bool { - return getPointerOperand(U) == Ptr; - }); - - // If the pointer is used by an instruction other than a memory access, - // it may not remain scalar. If the memory access is a gather or scatter - // operation, the pointer will not remain scalar. - if (!UsersAreMemAccesses || !isScalarDecision(&I, VF)) - PossibleNonScalarPtrs.insert(Ptr); - else - ScalarPtrs.insert(Ptr); - } - } - - // Add to the set of scalars all the pointers we know will not be vectorized. - for (auto *I : ScalarPtrs) - if (!PossibleNonScalarPtrs.count(I)) { - DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n"); - Scalars[VF].insert(I); + auto *GEP = getGEPInstruction(Ptr); + if (GEP && getWideningDecision(&I, VF) == CM_GatherScatter) + Scalars[VF].erase(GEP); } // An induction variable will remain scalar if all users of the induction @@ -5574,8 +5526,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // The induction variable and its update instruction will remain scalar. Scalars[VF].insert(Ind); Scalars[VF].insert(IndUpdate); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n"); - DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); } } |