diff options
author | Warren Ristow <warren.ristow@sony.com> | 2019-06-17 17:20:08 +0000 |
---|---|---|
committer | Warren Ristow <warren.ristow@sony.com> | 2019-06-17 17:20:08 +0000 |
commit | 6452bdd29b5a0bd0f902c06046add459f910d335 (patch) | |
tree | 73dbbcd0c95a8c83663cfc29775628a019280b09 /llvm/lib | |
parent | 3e140066bce1cbc40604274bd99a0cc2efed01f5 (diff) | |
download | bcm5719-llvm-6452bdd29b5a0bd0f902c06046add459f910d335.tar.gz bcm5719-llvm-6452bdd29b5a0bd0f902c06046add459f910d335.zip |
[LV] Suppress vectorization in some nontemporal cases
When considering a loop containing nontemporal stores or loads for
vectorization, suppress the vectorization if the corresponding
vectorized store or load with the aligment of the original scaler
memory op is not supported with the nontemporal hint on the target.
This adds two new functions:
bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
to TTI, leaving the target independent default implementation as
returning true, but with overriding implementations for X86 that
check the legality based on available Subtarget features.
This fixes https://llvm.org/PR40759
Differential Revision: https://reviews.llvm.org/D61764
llvm-svn: 363581
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 |
5 files changed, 80 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 763b6841878..a55c1be1a09 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -183,6 +183,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { return TTIImpl->isLegalMaskedLoad(DataType); } +bool TargetTransformInfo::isLegalNTStore(Type *DataType, + unsigned Alignment) const { + return TTIImpl->isLegalNTStore(DataType, Alignment); +} + +bool TargetTransformInfo::isLegalNTLoad(Type *DataType, + unsigned Alignment) const { + return TTIImpl->isLegalNTLoad(DataType, Alignment); +} + bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2b9a61d4c87..08e46ed2ce3 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3143,6 +3143,41 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType) { return isLegalMaskedLoad(DataType); } +bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) { + unsigned DataSize = DL.getTypeStoreSize(DataType); + // The only supported nontemporal loads are for aligned vectors of 16 or 32 + // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2 + // (the equivalent stores only require AVX). + if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32)) + return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2(); + + return false; +} + +bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) { + unsigned DataSize = DL.getTypeStoreSize(DataType); + + // SSE4A supports nontemporal stores of float and double at arbitrary + // alignment. + if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy())) + return true; + + // Besides the SSE4A subtarget exception above, only aligned stores are + // available nontemporaly on any other subtarget. And only stores with a size + // of 4..32 bytes (powers of 2, only) are permitted. + if (Alignment < DataSize || DataSize < 4 || DataSize > 32 || + !isPowerOf2_32(DataSize)) + return false; + + // 32-byte vector nontemporal stores are supported by AVX (the equivalent + // loads require AVX2). + if (DataSize == 32) + return ST->hasAVX(); + else if (DataSize == 16) + return ST->hasSSE1(); + return true; +} + bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) { if (!isa<VectorType>(DataTy)) return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 351a4f22060..f43155e3838 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -186,6 +186,8 @@ public: bool canMacroFuseCmp(); bool isLegalMaskedLoad(Type *DataType); bool isLegalMaskedStore(Type *DataType); + bool isLegalNTLoad(Type *DataType, unsigned Alignment); + bool isLegalNTStore(Type *DataType, unsigned Alignment); bool isLegalMaskedGather(Type *DataType); bool isLegalMaskedScatter(Type *DataType); bool isLegalMaskedExpandLoad(Type *DataType); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index e5713c4355f..6ef8dc2d3cd 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -767,6 +767,38 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } + // For nontemporal stores, check that a nontemporal vector version is + // supported on the target. + if (ST->getMetadata(LLVMContext::MD_nontemporal)) { + // Arbitrarily try a vector of 2 elements. + Type *VecTy = VectorType::get(T, /*NumElements=*/2); + assert(VecTy && "did not find vectorized version of stored type"); + unsigned Alignment = getLoadStoreAlignment(ST); + if (!TTI->isLegalNTStore(VecTy, Alignment)) { + reportVectorizationFailure( + "nontemporal store instruction cannot be vectorized", + "nontemporal store instruction cannot be vectorized", + "CantVectorizeNontemporalStore", ST); + return false; + } + } + + } else if (auto *LD = dyn_cast<LoadInst>(&I)) { + if (LD->getMetadata(LLVMContext::MD_nontemporal)) { + // For nontemporal loads, check that a nontemporal vector version is + // supported on the target (arbitrarily try a vector of 2 elements). + Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); + assert(VecTy && "did not find vectorized version of load type"); + unsigned Alignment = getLoadStoreAlignment(LD); + if (!TTI->isLegalNTLoad(VecTy, Alignment)) { + reportVectorizationFailure( + "nontemporal load instruction cannot be vectorized", + "nontemporal load instruction cannot be vectorized", + "CantVectorizeNontemporalLoad", LD); + return false; + } + } + // FP instructions can allow unsafe algebra, thus vectorizable by // non-IEEE-754 compliant SIMD units. // This applies to floating-point math operations and calls, not memory diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 01104f68bb3..7b3b9ddfad4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7275,7 +7275,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements(*ORE); - LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE, + LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE, &Requirements, &Hints, DB, AC); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); |