diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 |
5 files changed, 80 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 763b6841878..a55c1be1a09 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -183,6 +183,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { return TTIImpl->isLegalMaskedLoad(DataType); } +bool TargetTransformInfo::isLegalNTStore(Type *DataType, + unsigned Alignment) const { + return TTIImpl->isLegalNTStore(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, + unsigned Alignment) const { + return TTIImpl->isLegalNTLoad(DataType, Alignment); +} + bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2b9a61d4c87..08e46ed2ce3 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3143,6 +3143,41 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType) { return isLegalMaskedLoad(DataType); } +bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) { + unsigned DataSize = DL.getTypeStoreSize(DataType); + // The only supported nontemporal loads are for aligned vectors of 16 or 32 + // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2 + // (the equivalent stores only require AVX). + if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32)) + return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2(); + + return false; +} + +bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) { + unsigned DataSize = DL.getTypeStoreSize(DataType); + + // SSE4A supports nontemporal stores of float and double at arbitrary + // alignment. + if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy())) + return true; + + // Besides the SSE4A subtarget exception above, only aligned stores are + // available nontemporaly on any other subtarget. And only stores with a size + // of 4..32 bytes (powers of 2, only) are permitted. + if (Alignment < DataSize || DataSize < 4 || DataSize > 32 || + !isPowerOf2_32(DataSize)) + return false; + + // 32-byte vector nontemporal stores are supported by AVX (the equivalent + // loads require AVX2). + if (DataSize == 32) + return ST->hasAVX(); + else if (DataSize == 16) + return ST->hasSSE1(); + return true; +} + bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) { if (!isa<VectorType>(DataTy)) return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 351a4f22060..f43155e3838 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -186,6 +186,8 @@ public: bool canMacroFuseCmp(); bool isLegalMaskedLoad(Type *DataType); bool isLegalMaskedStore(Type *DataType); + bool isLegalNTLoad(Type *DataType, unsigned Alignment); + bool isLegalNTStore(Type *DataType, unsigned Alignment); bool isLegalMaskedGather(Type *DataType); bool isLegalMaskedScatter(Type *DataType); bool isLegalMaskedExpandLoad(Type *DataType); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index e5713c4355f..6ef8dc2d3cd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -767,6 +767,38 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } + // For nontemporal stores, check that a nontemporal vector version is + // supported on the target. + if (ST->getMetadata(LLVMContext::MD_nontemporal)) { + // Arbitrarily try a vector of 2 elements. + Type *VecTy = VectorType::get(T, /*NumElements=*/2); + assert(VecTy && "did not find vectorized version of stored type"); + unsigned Alignment = getLoadStoreAlignment(ST); + if (!TTI->isLegalNTStore(VecTy, Alignment)) { + reportVectorizationFailure( + "nontemporal store instruction cannot be vectorized", + "nontemporal store instruction cannot be vectorized", + "CantVectorizeNontemporalStore", ST); + return false; + } + } + + } else if (auto *LD = dyn_cast<LoadInst>(&I)) { + if (LD->getMetadata(LLVMContext::MD_nontemporal)) { + // For nontemporal loads, check that a nontemporal vector version is + // supported on the target (arbitrarily try a vector of 2 elements). + Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); + assert(VecTy && "did not find vectorized version of load type"); + unsigned Alignment = getLoadStoreAlignment(LD); + if (!TTI->isLegalNTLoad(VecTy, Alignment)) { + reportVectorizationFailure( + "nontemporal load instruction cannot be vectorized", + "nontemporal load instruction cannot be vectorized", + "CantVectorizeNontemporalLoad", LD); + return false; + } + } + // FP instructions can allow unsafe algebra, thus vectorizable by // non-IEEE-754 compliant SIMD units. // This applies to floating-point math operations and calls, not memory diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 01104f68bb3..7b3b9ddfad4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7275,7 +7275,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements(*ORE); - LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE, + LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE, &Requirements, &Hints, DB, AC); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); |