summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/BBVectorize.cpp24
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp116
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp94
3 files changed, 108 insertions, 126 deletions
diff --git a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index c16e4e089d7..29fb01f1b2e 100644
--- a/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -207,7 +207,6 @@ namespace {
AA = &P->getAnalysis<AliasAnalysis>();
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &P->getAnalysis<ScalarEvolution>();
- DL = &F.getParent()->getDataLayout();
TTI = IgnoreTargetInfo
? nullptr
: &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -222,7 +221,6 @@ namespace {
AliasAnalysis *AA;
DominatorTree *DT;
ScalarEvolution *SE;
- const DataLayout *DL;
const TargetTransformInfo *TTI;
// FIXME: const correct?
@@ -442,7 +440,6 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
- DL = &BB.getModule()->getDataLayout();
TTI = IgnoreTargetInfo
? nullptr
: &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
@@ -641,13 +638,13 @@ namespace {
dyn_cast<SCEVConstant>(OffsetSCEV)) {
ConstantInt *IntOff = ConstOffSCEV->getValue();
int64_t Offset = IntOff->getSExtValue();
-
+ const DataLayout &DL = I->getModule()->getDataLayout();
Type *VTy = IPtr->getType()->getPointerElementType();
- int64_t VTyTSS = (int64_t) DL->getTypeStoreSize(VTy);
+ int64_t VTyTSS = (int64_t)DL.getTypeStoreSize(VTy);
Type *VTy2 = JPtr->getType()->getPointerElementType();
if (VTy != VTy2 && Offset < 0) {
- int64_t VTy2TSS = (int64_t) DL->getTypeStoreSize(VTy2);
+ int64_t VTy2TSS = (int64_t)DL.getTypeStoreSize(VTy2);
OffsetInElmts = Offset/VTy2TSS;
return (std::abs(Offset) % VTy2TSS) == 0;
}
@@ -845,7 +842,7 @@ namespace {
// It is important to cleanup here so that future iterations of this
// function have less work to do.
- (void) SimplifyInstructionsInBlock(&BB, DL, AA->getTargetLibraryInfo());
+ (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
return true;
}
@@ -899,10 +896,6 @@ namespace {
return false;
}
- // We can't vectorize memory operations without target data
- if (!DL && IsSimpleLoadStore)
- return false;
-
Type *T1, *T2;
getInstructionTypes(I, T1, T2);
@@ -937,9 +930,8 @@ namespace {
if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
return false;
- if ((!Config.VectorizePointers || !DL) &&
- (T1->getScalarType()->isPointerTy() ||
- T2->getScalarType()->isPointerTy()))
+ if (!Config.VectorizePointers && (T1->getScalarType()->isPointerTy() ||
+ T2->getScalarType()->isPointerTy()))
return false;
if (!TTI && (T1->getPrimitiveSizeInBits() >= Config.VectorBits ||
@@ -1000,8 +992,8 @@ namespace {
// An aligned load or store is possible only if the instruction
// with the lower offset has an alignment suitable for the
// vector type.
-
- unsigned VecAlignment = DL->getPrefTypeAlignment(VType);
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ unsigned VecAlignment = DL.getPrefTypeAlignment(VType);
if (BottomAlignment < VecAlignment)
return false;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d22b469046f..18a456f611a 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -244,13 +244,12 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- const TargetLibraryInfo *TLI, unsigned VecWidth,
- unsigned UnrollFactor)
- : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
- VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
- Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
- Legal(nullptr), AddedSafetyChecks(false) {}
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ unsigned VecWidth, unsigned UnrollFactor)
+ : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), VF(VecWidth),
+ UF(UnrollFactor), Builder(SE->getContext()), Induction(nullptr),
+ OldInduction(nullptr), WidenMap(UnrollFactor), Legal(nullptr),
+ AddedSafetyChecks(false) {}
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *L) {
@@ -403,8 +402,6 @@ protected:
DominatorTree *DT;
/// Alias Analysis.
AliasAnalysis *AA;
- /// Data Layout.
- const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
@@ -456,9 +453,9 @@ protected:
class InnerLoopUnroller : public InnerLoopVectorizer {
public:
InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
- DominatorTree *DT, const DataLayout *DL,
- const TargetLibraryInfo *TLI, unsigned UnrollFactor) :
- InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
+ DominatorTree *DT, const TargetLibraryInfo *TLI,
+ unsigned UnrollFactor)
+ : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, 1, UnrollFactor) {}
private:
void scalarizeInstruction(Instruction *Instr,
@@ -560,14 +557,13 @@ static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *F
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI,
- AliasAnalysis *AA, Function *F,
- const TargetTransformInfo *TTI,
+ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+ TargetLibraryInfo *TLI, AliasAnalysis *AA,
+ Function *F, const TargetTransformInfo *TTI,
LoopAccessAnalysis *LAA)
- : NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr),
- Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
+ : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), Induction(nullptr),
+ WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -859,8 +855,6 @@ private:
Loop *TheLoop;
/// Scev analysis.
ScalarEvolution *SE;
- /// DataLayout analysis.
- const DataLayout *DL;
/// Target Library Info.
TargetLibraryInfo *TLI;
/// Parent function
@@ -919,10 +913,9 @@ public:
LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
LoopVectorizationLegality *Legal,
const TargetTransformInfo &TTI,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
- AssumptionCache *AC, const Function *F,
- const LoopVectorizeHints *Hints)
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const Function *F, const LoopVectorizeHints *Hints)
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
TheFunction(F), Hints(Hints) {
CodeMetrics::collectEphemeralValues(L, AC, EphValues);
}
@@ -1000,8 +993,6 @@ private:
LoopVectorizationLegality *Legal;
/// Vector target information.
const TargetTransformInfo &TTI;
- /// Target data layout information.
- const DataLayout *DL;
/// Target Library Info.
const TargetLibraryInfo *TLI;
const Function *TheFunction;
@@ -1266,7 +1257,6 @@ struct LoopVectorize : public FunctionPass {
}
ScalarEvolution *SE;
- const DataLayout *DL;
LoopInfo *LI;
TargetTransformInfo *TTI;
DominatorTree *DT;
@@ -1282,7 +1272,6 @@ struct LoopVectorize : public FunctionPass {
bool runOnFunction(Function &F) override {
SE = &getAnalysis<ScalarEvolution>();
- DL = &F.getParent()->getDataLayout();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1303,12 +1292,6 @@ struct LoopVectorize : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- if (!DL) {
- DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName()
- << ": Missing data layout\n");
- return false;
- }
-
// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
// and can invalidate iterators across the loops.
@@ -1436,7 +1419,7 @@ struct LoopVectorize : public FunctionPass {
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI, LAA);
+ LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
@@ -1444,8 +1427,7 @@ struct LoopVectorize : public FunctionPass {
}
// Use the cost model.
- LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AC, F,
- &Hints);
+ LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
// Check the function attributes to find out if this function should be
// optimized for size.
@@ -1509,11 +1491,11 @@ struct LoopVectorize : public FunctionPass {
// We decided not to vectorize, but we may want to unroll.
- InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
+ InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, UF);
Unroller.vectorize(&LVL);
} else {
// If we decided that it is *legal* to vectorize the loop then do it.
- InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
+ InnerLoopVectorizer LB(L, SE, LI, DT, TLI, VF.Width, UF);
LB.vectorize(&LVL);
++LoopsVectorized;
@@ -1612,10 +1594,10 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
/// \brief Find the operand of the GEP that should be checked for consecutive
/// stores. This ignores trailing indices that have no effect on the final
/// pointer.
-static unsigned getGEPInductionOperand(const DataLayout *DL,
- const GetElementPtrInst *Gep) {
+static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
+ const DataLayout &DL = Gep->getModule()->getDataLayout();
unsigned LastOperand = Gep->getNumOperands() - 1;
- unsigned GEPAllocSize = DL->getTypeAllocSize(
+ unsigned GEPAllocSize = DL.getTypeAllocSize(
cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
// Walk backwards and try to peel off zeros.
@@ -1626,7 +1608,7 @@ static unsigned getGEPInductionOperand(const DataLayout *DL,
// If it's a type with the same allocation size as the result of the GEP we
// can peel off the zero index.
- if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize)
+ if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
break;
--LastOperand;
}
@@ -1672,7 +1654,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return II.getConsecutiveDirection();
}
- unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
// Check that all of the gep indices are uniform except for our induction
// operand.
@@ -1765,11 +1747,12 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
+ const DataLayout &DL = Instr->getModule()->getDataLayout();
if (!Alignment)
- Alignment = DL->getABITypeAlignment(ScalarDataTy);
+ Alignment = DL.getABITypeAlignment(ScalarDataTy);
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
- unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
- unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+ unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy);
+ unsigned VectorElementSize = DL.getTypeStoreSize(DataTy) / VF;
if (SI && Legal->blockNeedsPredication(SI->getParent()) &&
!Legal->isMaskRequired(SI))
@@ -1810,7 +1793,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
// The last index does not have to be the induction. It can be
// consecutive and be a function of the index. For example A[I+1];
unsigned NumOperands = Gep->getNumOperands();
- unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
+ unsigned InductionOperand = getGEPInductionOperand(Gep);
// Create the new GEP with the new induction variable.
GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
@@ -2131,9 +2114,11 @@ void InnerLoopVectorizer::createEmptyLoop() {
ExitCount = SE->getAddExpr(BackedgeTakeCount,
SE->getConstant(BackedgeTakeCount->getType(), 1));
+ const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
+
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*SE, "induction");
+ SCEVExpander Exp(*SE, DL, "induction");
// We need to test whether the backedge-taken count is uint##_max. Adding one
// to it will cause overflow and an incorrect loop trip count in the vector
@@ -3515,6 +3500,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Look for the attribute signaling the absence of NaNs.
Function &F = *Header->getParent();
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (F.hasFnAttribute("no-nans-fp-math"))
HasFunNoNaNAttr =
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
@@ -3570,9 +3556,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (IK_NoInduction != IK) {
// Get the widest type.
if (!WidestIndTy)
- WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
else
- WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+ WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction && StepValue->isOne()) {
@@ -3717,13 +3703,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
///\brief Remove GEPs whose indices but the last one are loop invariant and
/// return the induction operand of the gep pointer.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
- const DataLayout *DL, Loop *Lp) {
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
if (!GEP)
return Ptr;
- unsigned InductionOperand = getGEPInductionOperand(DL, GEP);
+ unsigned InductionOperand = getGEPInductionOperand(GEP);
// Check that all of the gep indices are uniform except for our induction
// operand.
@@ -3752,8 +3737,7 @@ static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
///\brief Get the stride of a pointer access in a loop.
/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
/// pointer to the Value, or null otherwise.
-static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
- const DataLayout *DL, Loop *Lp) {
+static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
return nullptr;
@@ -3766,7 +3750,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// The size of the pointer access.
int64_t PtrAccessSize = 1;
- Ptr = stripGetElementPtr(Ptr, SE, DL, Lp);
+ Ptr = stripGetElementPtr(Ptr, SE, Lp);
const SCEV *V = SE->getSCEV(Ptr);
if (Ptr != OrigPtr)
@@ -3785,7 +3769,8 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// Strip off the size of access multiplication if we are still analyzing the
// pointer.
if (OrigPtr == Ptr) {
- DL->getTypeAllocSize(PtrTy->getElementType());
+ const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+ DL.getTypeAllocSize(PtrTy->getElementType());
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
if (M->getOperand(0)->getSCEVType() != scConstant)
return nullptr;
@@ -3837,7 +3822,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
else
return;
- Value *Stride = getStrideFromPointer(Ptr, SE, DL, TheLoop);
+ Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
if (!Stride)
return;
@@ -4215,7 +4200,8 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
if (!PointerElementType->isSized())
return IK_NoInduction;
- int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType));
+ const DataLayout &DL = Phi->getModule()->getDataLayout();
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return IK_NoInduction;
@@ -4427,6 +4413,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
unsigned LoopVectorizationCostModel::getWidestType() {
unsigned MaxWidth = 8;
+ const DataLayout &DL = TheFunction->getParent()->getDataLayout();
// For each block.
for (Loop::block_iterator bb = TheLoop->block_begin(),
@@ -4461,7 +4448,7 @@ unsigned LoopVectorizationCostModel::getWidestType() {
continue;
MaxWidth = std::max(MaxWidth,
- (unsigned)DL->getTypeSizeInBits(T->getScalarType()));
+ (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
}
}
@@ -4958,8 +4945,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// Scalarized loads/stores.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
- unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
- unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+ const DataLayout &DL = I->getModule()->getDataLayout();
+ unsigned ScalarAllocatedSize = DL.getTypeAllocSize(ValTy);
+ unsigned VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
bool IsComplexComputation =
isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4302070fb7c..f1be1a58bbd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -342,11 +342,11 @@ public:
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
- BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
- TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
- LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC)
+ BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
+ TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
+ DominatorTree *Dt, AssumptionCache *AC)
: NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func),
- SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
}
@@ -383,7 +383,7 @@ public:
}
/// \returns true if the memory operations A and B are consecutive.
- bool isConsecutiveAccess(Value *A, Value *B);
+ bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
@@ -877,7 +877,6 @@ private:
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
- const DataLayout *DL;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -1130,8 +1129,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
- if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0])) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
+ if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
++NumLoadsWantToChangeOrder;
}
BS.cancelScheduling(VL);
@@ -1300,9 +1300,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
return;
}
case Instruction::Store: {
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
- if (!isConsecutiveAccess(VL[i], VL[i + 1])) {
+ if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
BS.cancelScheduling(VL);
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
@@ -1789,7 +1790,7 @@ unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
return -1;
}
-bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
Value *PtrA = getPointerOperand(A);
Value *PtrB = getPointerOperand(B);
unsigned ASA = getAddressSpaceOperand(A);
@@ -1803,13 +1804,13 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
return false;
- unsigned PtrBitWidth = DL->getPointerSizeInBits(ASA);
+ unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
- APInt Size(PtrBitWidth, DL->getTypeStoreSize(Ty));
+ APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
- PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetA);
- PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(*DL, OffsetB);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
APInt OffsetDelta = OffsetB - OffsetA;
@@ -1842,6 +1843,7 @@ bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right) {
+ const DataLayout &DL = F->getParent()->getDataLayout();
// Push left and right operands of binary operation into Left and Right
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
@@ -1856,10 +1858,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
Instruction *VL1 = cast<Instruction>(VL[j]);
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) {
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
std::swap(Left[j], Right[j]);
continue;
- } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) {
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -1870,10 +1872,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
Instruction *VL1 = cast<Instruction>(VL[j]);
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
- if (isConsecutiveAccess(L, L1) && VL1->isCommutative()) {
+ if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
std::swap(Left[j], Right[j]);
continue;
- } else if (isConsecutiveAccess(L, L1) && VL2->isCommutative()) {
+ } else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -1983,6 +1985,8 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
Right = OrigRight;
}
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
// Finally check if we can get longer vectorizable chain by reordering
// without breaking the good operand order detected above.
// E.g. If we have something like-
@@ -2001,7 +2005,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
for (unsigned j = 0; j < VL.size() - 1; ++j) {
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
- if (isConsecutiveAccess(L, L1)) {
+ if (isConsecutiveAccess(L, L1, DL)) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -2009,7 +2013,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
}
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
- if (isConsecutiveAccess(L, L1)) {
+ if (isConsecutiveAccess(L, L1, DL)) {
std::swap(Left[j + 1], Right[j + 1]);
continue;
}
@@ -2105,6 +2109,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return Gather(E->Scalars, VecTy);
}
+ const DataLayout &DL = F->getParent()->getDataLayout();
unsigned Opcode = getSameOpcode(E->Scalars);
switch (Opcode) {
@@ -2301,8 +2306,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(ScalarLoadTy);
+ if (!Alignment) {
+ Alignment = DL.getABITypeAlignment(ScalarLoadTy);
+ }
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
++NumVectorInstructions;
@@ -2331,8 +2337,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(
ExternalUser(SI->getPointerOperand(), cast<User>(VecPtr), 0));
- if (!Alignment)
- Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType());
+ if (!Alignment) {
+ Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType());
+ }
S->setAlignment(Alignment);
E->VectorizedValue = S;
++NumVectorInstructions;
@@ -3051,7 +3058,6 @@ struct SLPVectorizer : public FunctionPass {
}
ScalarEvolution *SE;
- const DataLayout *DL;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AliasAnalysis *AA;
@@ -3064,7 +3070,6 @@ struct SLPVectorizer : public FunctionPass {
return false;
SE = &getAnalysis<ScalarEvolution>();
- DL = &F.getParent()->getDataLayout();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -3081,11 +3086,6 @@ struct SLPVectorizer : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- // Must have DataLayout. We can't require it because some tests run w/o
- // triple.
- if (!DL)
- return false;
-
// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
@@ -3094,7 +3094,7 @@ struct SLPVectorizer : public FunctionPass {
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
- BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC);
+ BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC);
// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.
@@ -3190,7 +3190,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
<< "\n");
Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
- unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+ auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
+ unsigned Sz = DL.getTypeSizeInBits(StoreTy);
unsigned VF = MinVecRegSize / Sz;
if (!isPowerOf2_32(Sz) || VF < 2)
@@ -3233,8 +3234,8 @@ bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
int costThreshold, BoUpSLP &R) {
- SetVector<Value *> Heads, Tails;
- SmallDenseMap<Value *, Value *> ConsecutiveChain;
+ SetVector<StoreInst *> Heads, Tails;
+ SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
// We may run into multiple chains that merge into a single chain. We mark the
// stores that we vectorized so that we don't visit the same store twice.
@@ -3247,8 +3248,8 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
for (unsigned j = 0; j < e; ++j) {
if (i == j)
continue;
-
- if (R.isConsecutiveAccess(Stores[i], Stores[j])) {
+ const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+ if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
Tails.insert(Stores[j]);
Heads.insert(Stores[i]);
ConsecutiveChain[Stores[i]] = Stores[j];
@@ -3257,7 +3258,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
}
// For stores that start but don't end a link in the chain:
- for (SetVector<Value *>::iterator it = Heads.begin(), e = Heads.end();
+ for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
it != e; ++it) {
if (Tails.count(*it))
continue;
@@ -3265,7 +3266,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
// We found a store instr that starts a chain. Now follow the chain and try
// to vectorize it.
BoUpSLP::ValueList Operands;
- Value *I = *it;
+ StoreInst *I = *it;
// Collect the chain into a list.
while (Tails.count(I) || Heads.count(I)) {
if (VectorizedStores.count(I))
@@ -3290,6 +3291,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
unsigned count = 0;
StoreRefs.clear();
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
StoreInst *SI = dyn_cast<StoreInst>(it);
if (!SI)
@@ -3335,9 +3337,10 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
return false;
unsigned Opcode0 = I0->getOpcode();
+ const DataLayout &DL = I0->getModule()->getDataLayout();
Type *Ty0 = I0->getType();
- unsigned Sz = DL->getTypeSizeInBits(Ty0);
+ unsigned Sz = DL.getTypeSizeInBits(Ty0);
unsigned VF = MinVecRegSize / Sz;
for (int i = 0, e = VL.size(); i < e; ++i) {
@@ -3539,8 +3542,7 @@ public:
ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
/// \brief Try to find a reduction tree.
- bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
- const DataLayout *DL) {
+ bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
assert((!Phi ||
std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
"Thi phi needs to use the binary operator");
@@ -3565,9 +3567,10 @@ public:
if (!isValidElementType(Ty))
return false;
+ const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
- ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
+ ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;
ReductionPHI = Phi;
@@ -3877,8 +3880,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Try to match and vectorize a horizontal reduction.
HorizontalReduction HorRdx;
- if (ShouldVectorizeHor &&
- HorRdx.matchAssociativeReduction(P, BI, DL) &&
+ if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
HorRdx.tryToReduce(R, TTI)) {
Changed = true;
it = BB->begin();
@@ -3908,7 +3910,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) &&
+ if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
HorRdx.tryToReduce(R, TTI)) ||
tryToVectorize(BinOp, R))) {
Changed = true;
OpenPOWER on IntegriCloud