diff options
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 89 | 
1 files changed, 85 insertions, 4 deletions
| diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index abd19391c0c..fe1e978a20d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -15,6 +15,8 @@  //  #include "llvm/Transforms/Vectorize/LoopVectorize.h"  #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/ValueTracking.h"  #include "llvm/Analysis/VectorUtils.h"  #include "llvm/IR/IntrinsicInst.h" @@ -916,6 +918,72 @@ bool LoopVectorizationLegality::blockCanBePredicated(    return true;  } +/// Return true if we can prove that the given load would access only +/// dereferenceable memory, and be properly aligned on every iteration. +/// (i.e. does not require predication beyond that required by the the header +/// itself) TODO: Move to Loads.h/cpp in a separate change +static bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, +                                              ScalarEvolution &SE, +                                              DominatorTree &DT) { +  auto &DL = LI->getModule()->getDataLayout(); +  Value *Ptr = LI->getPointerOperand(); +  auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr)); +  if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine()) +    return false; +  auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE)); +  if (!Step) +    return false; +  APInt StepC = Step->getAPInt(); +  APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), +                 DL.getTypeStoreSize(LI->getType())); +  // TODO: generalize to access patterns which have gaps +  // TODO: handle uniform addresses (if not already handled by LICM) +  if (StepC != EltSize) +    return false; + +  // TODO: If the symbolic trip count has a small bound (max count), we might +  // be able to prove safety. +  auto TC = SE.getSmallConstantTripCount(L); +  if (!TC) +    return false; + +  const APInt AccessSize = TC * EltSize; + +  auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart()); +  if (!StartS) +    return false; +  assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition"); +  Value *Base = StartS->getValue(); + +  Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); + +  unsigned Align = LI->getAlignment(); +  if (Align == 0) +    Align = DL.getABITypeAlignment(LI->getType()); +  // For the moment, restrict ourselves to the case where the access size is a +  // multiple of the requested alignment and the base is aligned. +  // TODO: generalize if a case found which warrants +  if (EltSize.urem(Align) != 0) +    return false; +  return isDereferenceableAndAlignedPointer(Base, Align, AccessSize, +                                            DL, HeaderFirstNonPHI, &DT); +} + +/// Return true if speculation of the given load must be suppressed for +/// correctness reasons.  If not suppressed, dereferenceability and alignment +/// must be proven. +/// TODO: Move to ValueTracking.h/cpp in a separate change +static bool mustSuppressSpeculation(const LoadInst &LI) { +  if (!LI.isUnordered()) +    return true; +  const Function &F = *LI.getFunction(); +  // Speculative load may create a race that did not exist in the source. +  return F.hasFnAttribute(Attribute::SanitizeThread) || +    // Speculative load may load data from dirty regions. +    F.hasFnAttribute(Attribute::SanitizeAddress) || +    F.hasFnAttribute(Attribute::SanitizeHWAddress); +} +  bool LoopVectorizationLegality::canVectorizeWithIfConvert() {    if (!EnableIfConversion) {      reportVectorizationFailure("If-conversion is disabled", @@ -936,12 +1004,25 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {    // Collect safe addresses.    for (BasicBlock *BB : TheLoop->blocks()) { -    if (blockNeedsPredication(BB)) +    if (!blockNeedsPredication(BB)) { +      for (Instruction &I : *BB) +        if (auto *Ptr = getLoadStorePointerOperand(&I)) +          SafePointes.insert(Ptr);        continue; +    } -    for (Instruction &I : *BB) -      if (auto *Ptr = getLoadStorePointerOperand(&I)) -        SafePointes.insert(Ptr); +    // For a block which requires predication, a address may be safe to access +    // in the loop w/o predication if we can prove dereferenceability facts +    // sufficient to ensure it'll never fault within the loop. For the moment, +    // we restrict this to loads; stores are more complicated due to +    // concurrency restrictions. +    ScalarEvolution &SE = *PSE.getSE(); +    for (Instruction &I : *BB) { +      LoadInst *LI = dyn_cast<LoadInst>(&I); +      if (LI && !mustSuppressSpeculation(*LI) && +          isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) +        SafePointes.insert(LI->getPointerOperand()); +    }    }    // Collect the blocks that need predication. | 

