summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp89
1 files changed, 85 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index abd19391c0c..fe1e978a20d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -15,6 +15,8 @@
//
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -916,6 +918,72 @@ bool LoopVectorizationLegality::blockCanBePredicated(
return true;
}
+/// Return true if we can prove that the given load would access only
+/// dereferenceable memory, and be properly aligned on every iteration.
+/// (i.e. does not require predication beyond that required by the the header
+/// itself) TODO: Move to Loads.h/cpp in a separate change
+static bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
+ ScalarEvolution &SE,
+ DominatorTree &DT) {
+ auto &DL = LI->getModule()->getDataLayout();
+ Value *Ptr = LI->getPointerOperand();
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
+ if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
+ return false;
+ auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
+ if (!Step)
+ return false;
+ APInt StepC = Step->getAPInt();
+ APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
+ DL.getTypeStoreSize(LI->getType()));
+ // TODO: generalize to access patterns which have gaps
+ // TODO: handle uniform addresses (if not already handled by LICM)
+ if (StepC != EltSize)
+ return false;
+
+ // TODO: If the symbolic trip count has a small bound (max count), we might
+ // be able to prove safety.
+ auto TC = SE.getSmallConstantTripCount(L);
+ if (!TC)
+ return false;
+
+ const APInt AccessSize = TC * EltSize;
+
+ auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart());
+ if (!StartS)
+ return false;
+ assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition");
+ Value *Base = StartS->getValue();
+
+ Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
+
+ unsigned Align = LI->getAlignment();
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(LI->getType());
+ // For the moment, restrict ourselves to the case where the access size is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (EltSize.urem(Align) != 0)
+ return false;
+ return isDereferenceableAndAlignedPointer(Base, Align, AccessSize,
+ DL, HeaderFirstNonPHI, &DT);
+}
+
+/// Return true if speculation of the given load must be suppressed for
+/// correctness reasons. If not suppressed, dereferenceability and alignment
+/// must be proven.
+/// TODO: Move to ValueTracking.h/cpp in a separate change
+static bool mustSuppressSpeculation(const LoadInst &LI) {
+ if (!LI.isUnordered())
+ return true;
+ const Function &F = *LI.getFunction();
+ // Speculative load may create a race that did not exist in the source.
+ return F.hasFnAttribute(Attribute::SanitizeThread) ||
+ // Speculative load may load data from dirty regions.
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress);
+}
+
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
if (!EnableIfConversion) {
reportVectorizationFailure("If-conversion is disabled",
@@ -936,12 +1004,25 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
// Collect safe addresses.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (blockNeedsPredication(BB))
+ if (!blockNeedsPredication(BB)) {
+ for (Instruction &I : *BB)
+ if (auto *Ptr = getLoadStorePointerOperand(&I))
+ SafePointes.insert(Ptr);
continue;
+ }
- for (Instruction &I : *BB)
- if (auto *Ptr = getLoadStorePointerOperand(&I))
- SafePointes.insert(Ptr);
+ // For a block which requires predication, a address may be safe to access
+ // in the loop w/o predication if we can prove dereferenceability facts
+ // sufficient to ensure it'll never fault within the loop. For the moment,
+ // we restrict this to loads; stores are more complicated due to
+ // concurrency restrictions.
+ ScalarEvolution &SE = *PSE.getSE();
+ for (Instruction &I : *BB) {
+ LoadInst *LI = dyn_cast<LoadInst>(&I);
+ if (LI && !mustSuppressSpeculation(*LI) &&
+ isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
+ SafePointes.insert(LI->getPointerOperand());
+ }
}
// Collect the blocks that need predication.
OpenPOWER on IntegriCloud