diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 42 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp | 81 |
4 files changed, 79 insertions, 48 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index df77ccfd525..7f225a59258 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -251,10 +251,6 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } -unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { - return TTIImpl->getLoadStoreVecRegBitWidth(AS); -} - unsigned TargetTransformInfo::getCacheLineSize() const { return TTIImpl->getCacheLineSize(); } @@ -423,6 +419,44 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller, return TTIImpl->areInlineCompatible(Caller, Callee); } +unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const { + return TTIImpl->getLoadStoreVecRegBitWidth(AS); +} + +bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const { + return TTIImpl->isLegalToVectorizeLoad(LI); +} + +bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const { + return TTIImpl->isLegalToVectorizeStore(SI); +} + +bool TargetTransformInfo::isLegalToVectorizeLoadChain( + unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, + AddrSpace); +} + +bool TargetTransformInfo::isLegalToVectorizeStoreChain( + unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const { + return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, + AddrSpace); +} + +unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF, + unsigned LoadSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); +} + +unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, + unsigned StoreSize, + unsigned ChainSizeInBytes, + VectorType *VecTy) const { + return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 3d630fe3ea9..e328e0c5a43 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -80,7 +80,7 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { return Vector ? 0 : 32; } -unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) { +unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { switch (AddrSpace) { case AMDGPUAS::GLOBAL_ADDRESS: case AMDGPUAS::CONSTANT_ADDRESS: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index a82a0745808..8c8be63b53b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -82,7 +82,7 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); - unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace); + unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; unsigned getMaxInterleaveFactor(unsigned VF); int getArithmeticInstrCost( diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 6677291ad42..ead17d01a96 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -429,10 +429,13 @@ void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) { std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>> Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits) { - unsigned ElemSizeInBytes = ElementSizeBits / 8; - unsigned SizeInBytes = ElemSizeInBytes * Chain.size(); - unsigned NumRight = (SizeInBytes % 4) / ElemSizeInBytes; - unsigned NumLeft = Chain.size() - NumRight; + unsigned ElementSizeBytes = ElementSizeBits / 8; + unsigned SizeBytes = ElementSizeBytes * Chain.size(); + unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes; + if (NumLeft == Chain.size()) + --NumLeft; + else if (NumLeft == 0) + NumLeft = 1; return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft)); } @@ -540,6 +543,10 @@ Vectorizer::collectInstructions(BasicBlock *BB) { if (!LI->isSimple()) continue; + // Skip if it's not legal. + if (!TTI.isLegalToVectorizeLoad(LI)) + continue; + Type *Ty = LI->getType(); if (!VectorType::isValidElementType(Ty->getScalarType())) continue; @@ -565,8 +572,6 @@ Vectorizer::collectInstructions(BasicBlock *BB) { })) continue; - // TODO: Target hook to filter types. - // Save the load locations. Value *ObjPtr = GetUnderlyingObject(Ptr, DL); LoadRefs[ObjPtr].push_back(LI); @@ -575,6 +580,10 @@ Vectorizer::collectInstructions(BasicBlock *BB) { if (!SI->isSimple()) continue; + // Skip if it's not legal. + if (!TTI.isLegalToVectorizeStore(SI)) + continue; + Type *Ty = SI->getValueOperand()->getType(); if (!VectorType::isValidElementType(Ty->getScalarType())) continue; @@ -719,6 +728,7 @@ bool Vectorizer::vectorizeStoreChain( unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); + unsigned Alignment = getAlignment(S0); if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { InstructionsProcessed->insert(Chain.begin(), Chain.end()); @@ -741,17 +751,11 @@ bool Vectorizer::vectorizeStoreChain( Chain = NewChain; ChainSize = Chain.size(); - // Store size should be 1B, 2B or multiple of 4B. - // TODO: Target hook for size constraint? + // Check if it's legal to vectorize this chain. If not, split the chain and + // try again. unsigned EltSzInBytes = Sz / 8; unsigned SzInBytes = EltSzInBytes * ChainSize; - if (SzInBytes > 2 && SzInBytes % 4 != 0) { - DEBUG(dbgs() << "LSV: Size should be 1B, 2B " - "or multiple of 4B. Splitting.\n"); - if (SzInBytes == 3) - return vectorizeStoreChain(Chain.slice(0, ChainSize - 1), - InstructionsProcessed); - + if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) { auto Chains = splitOddVectorElts(Chain, Sz); return vectorizeStoreChain(Chains.first, InstructionsProcessed) | vectorizeStoreChain(Chains.second, InstructionsProcessed); @@ -765,13 +769,15 @@ bool Vectorizer::vectorizeStoreChain( else VecTy = VectorType::get(StoreTy, Chain.size()); - // If it's more than the max vector size, break it into two pieces. - // TODO: Target hook to control types to split to. - if (ChainSize > VF) { - DEBUG(dbgs() << "LSV: Vector factor is too big." + // If it's more than the max vector size or the target has a better + // vector factor, break it into two pieces. + unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy); + if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) { + DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." " Creating two separate arrays.\n"); - return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) | - vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed); + return vectorizeStoreChain(Chain.slice(0, TargetVF), + InstructionsProcessed) | + vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed); } DEBUG({ @@ -784,9 +790,6 @@ bool Vectorizer::vectorizeStoreChain( // whether we succeed below. InstructionsProcessed->insert(Chain.begin(), Chain.end()); - // Check alignment restrictions. - unsigned Alignment = getAlignment(S0); - // If the store is going to be misaligned, don't vectorize it. if (accessIsMisaligned(SzInBytes, AS, Alignment)) { if (S0->getPointerAddressSpace() != 0) @@ -873,6 +876,7 @@ bool Vectorizer::vectorizeLoadChain( unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); + unsigned Alignment = getAlignment(L0); if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { InstructionsProcessed->insert(Chain.begin(), Chain.end()); @@ -895,16 +899,11 @@ bool Vectorizer::vectorizeLoadChain( Chain = NewChain; ChainSize = Chain.size(); - // Load size should be 1B, 2B or multiple of 4B. - // TODO: Should size constraint be a target hook? + // Check if it's legal to vectorize this chain. If not, split the chain and + // try again. unsigned EltSzInBytes = Sz / 8; unsigned SzInBytes = EltSzInBytes * ChainSize; - if (SzInBytes > 2 && SzInBytes % 4 != 0) { - DEBUG(dbgs() << "LSV: Size should be 1B, 2B " - "or multiple of 4B. Splitting.\n"); - if (SzInBytes == 3) - return vectorizeLoadChain(Chain.slice(0, ChainSize - 1), - InstructionsProcessed); + if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) { auto Chains = splitOddVectorElts(Chain, Sz); return vectorizeLoadChain(Chains.first, InstructionsProcessed) | vectorizeLoadChain(Chains.second, InstructionsProcessed); @@ -918,22 +917,20 @@ bool Vectorizer::vectorizeLoadChain( else VecTy = VectorType::get(LoadTy, Chain.size()); - // If it's more than the max vector size, break it into two pieces. - // TODO: Target hook to control types to split to. - if (ChainSize > VF) { - DEBUG(dbgs() << "LSV: Vector factor is too big. " - "Creating two separate arrays.\n"); - return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) | - vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed); + // If it's more than the max vector size or the target has a better + // vector factor, break it into two pieces. + unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy); + if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) { + DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor." + " Creating two separate arrays.\n"); + return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) | + vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed); } // We won't try again to vectorize the elements of the chain, regardless of // whether we succeed below. InstructionsProcessed->insert(Chain.begin(), Chain.end()); - // Check alignment restrictions. - unsigned Alignment = getAlignment(L0); - // If the load is going to be misaligned, don't vectorize it. if (accessIsMisaligned(SzInBytes, AS, Alignment)) { if (L0->getPointerAddressSpace() != 0) |