diff options
| -rw-r--r-- | llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 142 | ||||
| -rw-r--r-- | llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll | 78 | 
2 files changed, 33 insertions, 187 deletions
| diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 921bec8f4de..c0738a951c4 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -239,24 +239,6 @@ static bool isRemovable(Instruction *I) {    }  } - -/// isShortenable - Returns true if this instruction can be safely shortened in -/// length. -static bool isShortenable(Instruction *I) { -  // Don't shorten stores for now -  if (isa<StoreInst>(I)) -    return false; -   -  IntrinsicInst *II = cast<IntrinsicInst>(I); -  switch (II->getIntrinsicID()) { -    default: return false; -    case Intrinsic::memset: -    case Intrinsic::memcpy: -      // Do shorten memory intrinsics. -      return true; -  } -} -  /// getStoredPointerOperand - Return the pointer that is being written to.  static Value *getStoredPointerOperand(Instruction *I) {    if (StoreInst *SI = dyn_cast<StoreInst>(I)) @@ -311,24 +293,11 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) {    return false;  } -namespace { -  enum OverwriteResult -  { -    OverwriteComplete, -    OverwriteEnd, -    OverwriteUnknown -  }; -} - -/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location +/// isCompleteOverwrite - Return true if a store to the 'Later' location  /// completely overwrites a store to the 'Earlier' location. -/// 'OverwriteEnd' if the end of the 'Earlier' location is completely  -/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined -static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, -                                   const AliasAnalysis::Location &Earlier, -                                   AliasAnalysis &AA, -                                   int64_t& EarlierOff, -                                   int64_t& LaterOff) { +static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, +                                const AliasAnalysis::Location &Earlier, +                                AliasAnalysis &AA) {    const Value *P1 = Earlier.Ptr->stripPointerCasts();    const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -342,24 +311,23 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,        // If we have no TargetData information around, then the size of the store        // is inferrable from the pointee type.  If they are the same type, then        // we know that the store is safe. -      if (AA.getTargetData() == 0 && -          Later.Ptr->getType() == Earlier.Ptr->getType()) -        return OverwriteComplete; -         -      return OverwriteUnknown; +      if (AA.getTargetData() == 0) +        return Later.Ptr->getType() == Earlier.Ptr->getType(); +      return false;      }      // Make sure that the Later size is >= the Earlier size. -    if (Later.Size >= Earlier.Size) -      return OverwriteComplete; +    if (Later.Size < Earlier.Size) +      return false; +    return true;    }    // Otherwise, we have to have size information, and the later store has to be    // larger than the earlier one.    if (Later.Size == AliasAnalysis::UnknownSize ||        Earlier.Size == AliasAnalysis::UnknownSize || -      AA.getTargetData() == 0) -    return OverwriteUnknown; +      Later.Size <= Earlier.Size || AA.getTargetData() == 0) +    return false;    // Check to see if the later store is to the entire object (either a global,    // an alloca, or a byval argument).  If so, then it clearly overwrites any @@ -372,27 +340,26 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,    // If we can't resolve the same pointers to the same object, then we can't    // analyze them at all.    if (UO1 != UO2) -    return OverwriteUnknown; +    return false;    // If the "Later" store is to a recognizable object, get its size.    if (isObjectPointerWithTrustworthySize(UO2)) {      uint64_t ObjectSize =        TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());      if (ObjectSize == Later.Size) -      return OverwriteComplete; +      return true;    }    // Okay, we have stores to two completely different pointers.  Try to    // decompose the pointer into a "base + constant_offset" form.  If the base    // pointers are equal, then we can reason about the two stores. -  EarlierOff = 0; -  LaterOff = 0; +  int64_t EarlierOff = 0, LaterOff = 0;    const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);    const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);    // If the base pointers still differ, we have two completely different stores.    if (BP1 != BP2) -    return OverwriteUnknown; +    return false;    // The later store completely overlaps the earlier store if:    // @@ -410,24 +377,11 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,    //    // We have to be careful here as *Off is signed while *.Size is unsigned.    if (EarlierOff >= LaterOff && -      Later.Size > Earlier.Size &&        uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) -    return OverwriteComplete; -   -  // The other interesting case is if the later store overwrites the end of -  // the earlier store -  // -  //      |--earlier--| -  //                |--   later   --| -  // -  // In this case we may want to trim the size of earlier to avoid generating -  // writes to addresses which will definitely be overwritten later -  if (LaterOff > EarlierOff && -      LaterOff + Later.Size >= EarlierOff + Earlier.Size) -    return OverwriteEnd; +    return true;    // Otherwise, they don't completely overlap. -  return OverwriteUnknown; +  return false;  }  /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -551,52 +505,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {        // If we find a write that is a) removable (i.e., non-volatile), b) is        // completely obliterated by the store to 'Loc', and c) which we know that        // 'Inst' doesn't load from, then we can remove it. -      if (isRemovable(DepWrite) &&  +      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&            !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { -        int64_t InstWriteOffset, DepWriteOffset;  -        OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,  -                                         DepWriteOffset, InstWriteOffset);  -        if (OR == OverwriteComplete) { -          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " -                << *DepWrite << "\n  KILLER: " << *Inst << '\n'); - -          // Delete the store and now-dead instructions that feed it. -          DeleteDeadInstruction(DepWrite, *MD); -          ++NumFastStores; -          MadeChange = true; -           -          // DeleteDeadInstruction can delete the current instruction in loop -          // cases, reset BBI. -          BBI = Inst; -          if (BBI != BB.begin()) -            --BBI; -          break; -        } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { -          // TODO: base this on the target vector size so that if the earlier -          // store was too small to get vector writes anyway then its likely -          // a good idea to shorten it -          // Power of 2 vector writes are probably always a bad idea to optimize -          // as any store/memset/memcpy is likely using vector instructions so -          // shortening it to not vector size is likely to be slower -          MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite); -          unsigned DepWriteAlign = DepIntrinsic->getAlignment(); -          if (llvm::isPowerOf2_64(InstWriteOffset) || -              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { -             -            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW END: " -                  << *DepWrite << "\n  KILLER (offset "  -                  << InstWriteOffset << ", "  -                  << DepLoc.Size << ")" -                  << *Inst << '\n'); -             -            Value* DepWriteLength = DepIntrinsic->getLength(); -            Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), -                                                    InstWriteOffset -  -                                                    DepWriteOffset); -            DepIntrinsic->setLength(TrimmedLength); -            MadeChange = true; -          } -        } +        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " +              << *DepWrite << "\n  KILLER: " << *Inst << '\n'); + +        // Delete the store and now-dead instructions that feed it. +        DeleteDeadInstruction(DepWrite, *MD); +        ++NumFastStores; +        MadeChange = true; + +        // DeleteDeadInstruction can delete the current instruction in loop +        // cases, reset BBI. +        BBI = Inst; +        if (BBI != BB.begin()) +          --BBI; +        break;        }        // If this is a may-aliased store that is clobbering the store value, we diff --git a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll deleted file mode 100644 index 828ccc57a44..00000000000 --- a/llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll +++ /dev/null @@ -1,78 +0,0 @@ -; RUN: opt < %s -basicaa -dse -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -%struct.vec2 = type { <4 x i32>, <4 x i32> } -%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 } - -@glob1 = global %struct.vec2 zeroinitializer, align 16 -@glob2 = global %struct.vec2plusi zeroinitializer, align 16 - -define void @write24to28(i32* nocapture %p) nounwind uwtable ssp { -; CHECK: @write24to28 -entry: -  %arrayidx0 = getelementptr inbounds i32* %p, i64 1 -  %p3 = bitcast i32* %arrayidx0 to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false) -  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) -  %arrayidx1 = getelementptr inbounds i32* %p, i64 7 -  store i32 1, i32* %arrayidx1, align 4 -  ret void -} - -define void @write28to32(i32* nocapture %p) nounwind uwtable ssp { -; CHECK: @write28to32 -entry: -  %p3 = bitcast i32* %p to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false) -  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false) -  %arrayidx1 = getelementptr inbounds i32* %p, i64 7 -  store i32 1, i32* %arrayidx1, align 4 -  ret void -} - -define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp { -; CHECK: @dontwrite28to32memset -entry: -  %p3 = bitcast i32* %p to i8* -; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false) -  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false) -  %arrayidx1 = getelementptr inbounds i32* %p, i64 7 -  store i32 1, i32* %arrayidx1, align 4 -  ret void -} - -define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp { -; CHECK: @write32to36 -entry: -  %0 = bitcast %struct.vec2plusi* %p to i8* -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false) -  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false) -  %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2 -  store i32 1, i32* %c, align 4 -  ret void -} - -define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK: @write16to32 -entry: -  %0 = bitcast %struct.vec2* %p to i8* -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false) -  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false) -  %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1 -  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4 -  ret void -} - -define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp { -; CHECK: @dontwrite28to32memcpy -entry: -  %0 = bitcast %struct.vec2* %p to i8* -; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false) -  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false) -  %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7 -  store i32 1, i32* %arrayidx1, align 4 -  ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind | 

