summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp35
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp32
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2
5 files changed, 80 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 763b6841878..a55c1be1a09 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -183,6 +183,16 @@ bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedLoad(DataType);
}
+bool TargetTransformInfo::isLegalNTStore(Type *DataType,
+ unsigned Alignment) const {
+ return TTIImpl->isLegalNTStore(DataType, Alignment);
+}
+
+bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
+ unsigned Alignment) const {
+ return TTIImpl->isLegalNTLoad(DataType, Alignment);
+}
+
bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
return TTIImpl->isLegalMaskedGather(DataType);
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 2b9a61d4c87..08e46ed2ce3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3143,6 +3143,41 @@ bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
return isLegalMaskedLoad(DataType);
}
+bool X86TTIImpl::isLegalNTLoad(Type *DataType, unsigned Alignment) {
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+ // The only supported nontemporal loads are for aligned vectors of 16 or 32
+ // bytes. Note that 32-byte nontemporal vector loads are supported by AVX2
+ // (the equivalent stores only require AVX).
+ if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32))
+ return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2();
+
+ return false;
+}
+
+bool X86TTIImpl::isLegalNTStore(Type *DataType, unsigned Alignment) {
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+
+ // SSE4A supports nontemporal stores of float and double at arbitrary
+ // alignment.
+ if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy()))
+ return true;
+
+ // Besides the SSE4A subtarget exception above, only aligned stores are
+ // available nontemporaly on any other subtarget. And only stores with a size
+ // of 4..32 bytes (powers of 2, only) are permitted.
+ if (Alignment < DataSize || DataSize < 4 || DataSize > 32 ||
+ !isPowerOf2_32(DataSize))
+ return false;
+
+ // 32-byte vector nontemporal stores are supported by AVX (the equivalent
+ // loads require AVX2).
+ if (DataSize == 32)
+ return ST->hasAVX();
+ else if (DataSize == 16)
+ return ST->hasSSE1();
+ return true;
+}
+
bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
if (!isa<VectorType>(DataTy))
return false;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 351a4f22060..f43155e3838 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -186,6 +186,8 @@ public:
bool canMacroFuseCmp();
bool isLegalMaskedLoad(Type *DataType);
bool isLegalMaskedStore(Type *DataType);
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment);
+ bool isLegalNTStore(Type *DataType, unsigned Alignment);
bool isLegalMaskedGather(Type *DataType);
bool isLegalMaskedScatter(Type *DataType);
bool isLegalMaskedExpandLoad(Type *DataType);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index e5713c4355f..6ef8dc2d3cd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -767,6 +767,38 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
+ // For nontemporal stores, check that a nontemporal vector version is
+ // supported on the target.
+ if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
+ // Arbitrarily try a vector of 2 elements.
+ Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+ assert(VecTy && "did not find vectorized version of stored type");
+ unsigned Alignment = getLoadStoreAlignment(ST);
+ if (!TTI->isLegalNTStore(VecTy, Alignment)) {
+ reportVectorizationFailure(
+ "nontemporal store instruction cannot be vectorized",
+ "nontemporal store instruction cannot be vectorized",
+ "CantVectorizeNontemporalStore", ST);
+ return false;
+ }
+ }
+
+ } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
+ if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
+ // For nontemporal loads, check that a nontemporal vector version is
+ // supported on the target (arbitrarily try a vector of 2 elements).
+ Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+ assert(VecTy && "did not find vectorized version of load type");
+ unsigned Alignment = getLoadStoreAlignment(LD);
+ if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
+ reportVectorizationFailure(
+ "nontemporal load instruction cannot be vectorized",
+ "nontemporal load instruction cannot be vectorized",
+ "CantVectorizeNontemporalLoad", LD);
+ return false;
+ }
+ }
+
// FP instructions can allow unsafe algebra, thus vectorizable by
// non-IEEE-754 compliant SIMD units.
// This applies to floating-point math operations and calls, not memory
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 01104f68bb3..7b3b9ddfad4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7275,7 +7275,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements(*ORE);
- LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, GetLAA, LI, ORE,
+ LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
&Requirements, &Hints, DB, AC);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
OpenPOWER on IntegriCloud