diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 |
15 files changed, 69 insertions, 57 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c9c294873ea..e505b448bb7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -639,7 +639,7 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, } int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, + MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I) const { assert ((I == nullptr || I->getOpcode() == Opcode) && @@ -1201,14 +1201,14 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { const StoreInst *SI = cast<StoreInst>(I); Type *ValTy = SI->getValueOperand()->getType(); return getMemoryOpCost(I->getOpcode(), ValTy, - SI->getAlignment(), - SI->getPointerAddressSpace(), I); + MaybeAlign(SI->getAlignment()), + SI->getPointerAddressSpace(), I); } case Instruction::Load: { const LoadInst *LI = cast<LoadInst>(I); return getMemoryOpCost(I->getOpcode(), I->getType(), - LI->getAlignment(), - LI->getPointerAddressSpace(), I); + MaybeAlign(LI->getAlignment()), + LI->getPointerAddressSpace(), I); } case Instruction::ZExt: case Instruction::SExt: diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index dc916a7b340..908d72dbfc3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -632,12 +632,12 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { } int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, - unsigned Alignment, unsigned AddressSpace, + MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I) { auto LT = TLI->getTypeLegalizationCost(DL, Ty); if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && - LT.second.is128BitVector() && Alignment < 16) { + LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) { // Unaligned stores are extremely inefficient. We don't split all // unaligned 128-bit stores because the negative impact that has shown in // practice on inlined block copy code. @@ -703,8 +703,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { if (!I->isVectorTy()) continue; if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128) - Cost += getMemoryOpCost(Instruction::Store, I, 128, 0) + - getMemoryOpCost(Instruction::Load, I, 128, 0); + Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) + + getMemoryOpCost(Instruction::Load, I, Align(128), 0); } return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 32c59f41e1c..e85350cb6d4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -134,7 +134,7 @@ public: TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; - int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 86c8684d14d..ed1d6e5ca36 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -735,11 +735,13 @@ int ARMTTIImpl::getArithmeticInstrCost( return BaseCost; } -int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { +int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + const Instruction *I) { std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); - if (ST->hasNEON() && Src->isVectorTy() && Alignment != 16 && + if (ST->hasNEON() && Src->isVectorTy() && + (Alignment && *Alignment != Align(16)) && Src->getVectorElementType()->isDoubleTy()) { // Unaligned loads/stores are extremely inefficient. // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr. diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index a878fdcfe3c..c4e1a17d80c 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -189,7 +189,7 @@ public: TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef<const Value *> Args = ArrayRef<const Value *>()); - int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index ddbc5543348..b35e0e15cb7 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -152,7 +152,9 @@ unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, } unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, unsigned AddressSpace, const Instruction *I) { + MaybeAlign Alignment, + unsigned AddressSpace, + const Instruction *I) { assert(Opcode == Instruction::Load || Opcode == Instruction::Store); if (Opcode == Instruction::Store) return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); @@ -166,24 +168,30 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, // Cost of HVX loads. if (VecWidth % RegWidth == 0) return VecWidth / RegWidth; - // Cost of constructing HVX vector from scalar loads. - Alignment = std::min(Alignment, RegWidth / 8); - unsigned AlignWidth = 8 * std::max(1u, Alignment); + // Cost of constructing HVX vector from scalar loads + const Align RegAlign(RegWidth / 8); + if (!Alignment || *Alignment > RegAlign) + Alignment = RegAlign; + assert(Alignment); + unsigned AlignWidth = 8 * Alignment->value(); unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; return 3 * NumLoads; } // Non-HVX vectors. // Add extra cost for floating point types. - unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor - : 1; - Alignment = std::min(Alignment, 8u); - unsigned AlignWidth = 8 * std::max(1u, Alignment); + unsigned Cost = + VecTy->getElementType()->isFloatingPointTy() ? FloatFactor : 1; + + // At this point unspecified alignment is considered as Align::None(). + const Align BoundAlignment = std::min(Alignment.valueOrOne(), Align(8)); + unsigned AlignWidth = 8 * BoundAlignment.value(); unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; - if (Alignment == 4 || Alignment == 8) + if (Alignment == Align(4) || Alignment == Align(8)) return Cost * NumLoads; // Loads of less than 32 bits will need extra inserts to compose a vector. - unsigned LogA = Log2_32(Alignment); + assert(BoundAlignment <= Align(8)); + unsigned LogA = Log2(BoundAlignment); return (3 - LogA) * Cost * NumLoads; } @@ -214,7 +222,8 @@ unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, UseMaskForCond, UseMaskForGaps); - return getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, nullptr); + return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, + nullptr); } unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 12ede503af8..38d5bd969cf 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -112,8 +112,9 @@ public: unsigned ScalarizationCostPassed = UINT_MAX); unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S); - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, + unsigned AddressSpace, + const Instruction *I = nullptr); unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index f51300c656a..53c2f0f88d1 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -829,8 +829,9 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return Cost; } -int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { +int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + const Instruction *I) { // Legalize the type. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && @@ -888,7 +889,8 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, // to be decomposed based on the alignment factor. // Add the cost of each scalar load or store. - Cost += LT.first*(SrcBytes/Alignment-1); + assert(Alignment); + Cost += LT.first * ((SrcBytes / Alignment->value()) - 1); // For a vector type, there is also scalarization overhead (only for // stores, loads are expanded using the vector-load + permutation sequence, @@ -919,7 +921,8 @@ int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy); // Firstly, the cost of load/store operation. - int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace); + int Cost = + getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 83a70364bf6..76cd1c1ea08 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -97,7 +97,7 @@ public: int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 11c99aa1117..32eebf0c88c 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -259,7 +259,7 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, } if (isa<StoreInst>(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); - NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); } } @@ -995,7 +995,7 @@ static bool isBswapIntrinsicCall(const Value *V) { } int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, unsigned AddressSpace, + MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 3ba80b31439..c246ca46206 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -87,7 +87,7 @@ public: const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); - int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 70fd857fcf0..76d95eda547 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2404,8 +2404,9 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace, const Instruction *I) { +int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + MaybeAlign Alignment, unsigned AddressSpace, + const Instruction *I) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast<VectorType>(Src)) { unsigned NumElem = VTy->getVectorNumElements(); @@ -2456,7 +2457,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask - return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace); + return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace); unsigned NumElem = SrcVTy->getVectorNumElements(); VectorType *MaskTy = @@ -2474,7 +2475,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, int ValueSplitCost = getScalarizationOverhead(SrcVTy, IsLoad, IsStore); int MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - Alignment, AddressSpace); + MaybeAlign(Alignment), AddressSpace); return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; } @@ -3164,7 +3165,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr, ? ST->getGatherOverhead() : ST->getScatterOverhead(); return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - Alignment, AddressSpace); + MaybeAlign(Alignment), AddressSpace); } /// Return the cost of full scalarization of gather / scatter operation. @@ -3194,7 +3195,7 @@ int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, // The cost of the scalar loads/stores. int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - Alignment, AddressSpace); + MaybeAlign(Alignment), AddressSpace); int InsertExtractCost = 0; if (Opcode == Instruction::Load) @@ -3520,8 +3521,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, // Get the cost of one memory operation. Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), LegalVT.getVectorNumElements()); - unsigned MemOpCost = - getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); + unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, + MaybeAlign(Alignment), AddressSpace); VectorType *VT = VectorType::get(ScalarTy, VF); EVT ETy = TLI->getValueType(DL, VT); @@ -3620,8 +3621,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, // Get the cost of one memory operation. Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), LegalVT.getVectorNumElements()); - unsigned MemOpCost = - getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); + unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, + MaybeAlign(Alignment), AddressSpace); unsigned VF = VecTy->getVectorNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 7581257f41f..e0adaabafb0 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -133,7 +133,7 @@ public: int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, const Instruction *I = nullptr); int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8f0bf70f873..e6c481f81ee 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5746,7 +5746,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // vectorized loop where the user of it is a vectorized instruction. const MaybeAlign Alignment = getLoadStoreAlignment(I); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - Alignment ? Alignment->value() : 0, AS); + Alignment, AS); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5783,8 +5783,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment ? Alignment->value() : 0, AS); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, - Alignment ? Alignment->value() : 0, AS, I); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -5800,16 +5799,14 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned AS = getLoadStoreAddressSpace(I); if (isa<LoadInst>(I)) { return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Load, ValTy, - Alignment ? Alignment->value() : 0, AS) + + TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } StoreInst *SI = cast<StoreInst>(I); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Store, ValTy, - Alignment ? Alignment->value() : 0, AS) + + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, @@ -5877,8 +5874,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, - Alignment ? Alignment->value() : 0, AS, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); } return getWideningCost(I, VF); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2a3802262e9..5e4ba924585 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3162,7 +3162,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } case Instruction::Load: { // Cost of wide load - cost of scalar loads. - unsigned alignment = cast<LoadInst>(VL0)->getAlignment(); + MaybeAlign alignment(cast<LoadInst>(VL0)->getAlignment()); int ScalarEltCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); if (NeedToShuffleReuses) { @@ -3180,7 +3180,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } case Instruction::Store: { // We know that we can merge the stores. Calculate the cost. - unsigned alignment = cast<StoreInst>(VL0)->getAlignment(); + MaybeAlign alignment(cast<StoreInst>(VL0)->getAlignment()); int ScalarEltCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0); if (NeedToShuffleReuses) { |