diff options
| author | Daniel Neilson <dneilson@azul.com> | 2018-03-21 14:14:55 +0000 | 
|---|---|---|
| committer | Daniel Neilson <dneilson@azul.com> | 2018-03-21 14:14:55 +0000 | 
| commit | 6f1eb58e92039eaf359a18dbea129e0da417b840 (patch) | |
| tree | f09e506d7ee4e9610f15dfafd6176af7268bc7e6 /llvm | |
| parent | 038cbc5c13e33052c1b7dad1112c2a062e7c565e (diff) | |
| download | bcm5719-llvm-6f1eb58e92039eaf359a18dbea129e0da417b840.tar.gz bcm5719-llvm-6f1eb58e92039eaf359a18dbea129e0da417b840.zip  | |
[MemCpyOpt] Update to new API for memory intrinsic alignment
Summary:
This change is part of step five in the series of changes to remove alignment argument from
memcpy/memmove/memset in favour of alignment attributes. In particular, this changes the
MemCpyOpt pass to cease using:
1) The old getAlignment() API of MemoryIntrinsic in favour of getting source & dest specific
alignments through the new API.
2) The old IRBuilder CreateMemCpy/CreateMemMove single-alignment APIs in favour of the new
API that allows setting source and destination alignments independently.
We also add a few tests to fill gaps in the testing of this pass.
Steps:
Step 1) Remove alignment parameter and create alignment parameter attributes for
memcpy/memmove/memset. ( rL322965, rC322964, rL322963 )
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments. ( rL323597 )
Step 3) Update Clang to use the new IRBuilder API. ( rC323617 )
Step 4) Update Polly to use the new IRBuilder API. ( rL323618 )
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use [get|set]DestAlignment()
and [get|set]SourceAlignment() instead. ( rL323886, rL323891, rL324148, rL324273, rL324278,
rL324384, rL324395, rL324402, rL324626, rL324642, rL324653, rL324654, rL324773, rL324774,
rL324781, rL324784, rL324955, rL324960, rL325816, rL327398, rL327421 )
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reference
   http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
   http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
llvm-svn: 328097
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 60 | ||||
| -rw-r--r-- | llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll | 28 | ||||
| -rw-r--r-- | llvm/test/Transforms/MemCpyOpt/memcpy.ll | 14 | 
3 files changed, 77 insertions, 25 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9c870b42a74..e2de0ee6bcf 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -263,7 +263,7 @@ public:    void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {      int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); -    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); +    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);    }    void addRange(int64_t Start, int64_t Size, Value *Ptr, @@ -498,16 +498,25 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,    return AMemSet;  } -static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, -                                     const LoadInst *LI) { +static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {    unsigned StoreAlign = SI->getAlignment();    if (!StoreAlign)      StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); +  return StoreAlign; +} + +static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {    unsigned LoadAlign = LI->getAlignment();    if (!LoadAlign)      LoadAlign = DL.getABITypeAlignment(LI->getType()); +  return LoadAlign; +} -  return std::min(StoreAlign, LoadAlign); +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, +                                     const LoadInst *LI) { +  unsigned StoreAlign = findStoreAlignment(DL, SI); +  unsigned LoadAlign = findLoadAlignment(DL, LI); +  return MinAlign(StoreAlign, LoadAlign);  }  // This method try to lift a store instruction before position P. @@ -656,19 +665,20 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {            if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))              UseMemMove = true; -          unsigned Align = findCommonAlignment(DL, SI, LI);            uint64_t Size = DL.getTypeStoreSize(T);            IRBuilder<> Builder(P);            Instruction *M;            if (UseMemMove) -            M = Builder.CreateMemMove(SI->getPointerOperand(), -                                      LI->getPointerOperand(), Size, -                                      Align, SI->isVolatile()); +            M = Builder.CreateMemMove( +                SI->getPointerOperand(), findStoreAlignment(DL, SI), +                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, +                SI->isVolatile());            else -            M = Builder.CreateMemCpy(SI->getPointerOperand(), -                                     LI->getPointerOperand(), Size, -                                     Align, SI->isVolatile()); +            M = Builder.CreateMemCpy( +                SI->getPointerOperand(), findStoreAlignment(DL, SI), +                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, +                SI->isVolatile());            DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI                         << " => " << *M << "\n"); @@ -1047,20 +1057,17 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,    // If all checks passed, then we can transform M. -  // Make sure to use the lesser of the alignment of the source and the dest -  // since we're changing where we're reading from, but don't want to increase -  // the alignment past what can be read from or written to.    // TODO: Is this worth it if we're creating a less aligned memcpy? For    // example we could be moving from movaps -> movq on x86. -  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); -    IRBuilder<> Builder(M);    if (UseMemMove) -    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), -                          Align, M->isVolatile()); +    Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(), +                          MDep->getRawSource(), MDep->getSourceAlignment(), +                          M->getLength(), M->isVolatile());    else -    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), -                         Align, M->isVolatile()); +    Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(), +                         MDep->getRawSource(), MDep->getSourceAlignment(), +                         M->getLength(), M->isVolatile());    // Remove the instruction we're replacing.    MD->removeInstruction(M); @@ -1106,7 +1113,7 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,    // If Dest is aligned, and SrcSize is constant, use the minimum alignment    // of the sum.    const unsigned DestAlign = -      std::max(MemSet->getAlignment(), MemCpy->getAlignment()); +      std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());    if (DestAlign > 1)      if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))        Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); @@ -1166,7 +1173,7 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,    IRBuilder<> Builder(MemCpy);    Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), -                       CopySize, MemCpy->getAlignment()); +                       CopySize, MemCpy->getDestAlignment());    return true;  } @@ -1192,7 +1199,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {        if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {          IRBuilder<> Builder(M);          Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), -                             M->getAlignment(), false); +                             M->getDestAlignment(), false);          MD->removeInstruction(M);          M->eraseFromParent();          ++NumCpyToSet; @@ -1221,8 +1228,11 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {    //   d) memcpy from a just-memset'd source can be turned into memset.    if (DepInfo.isClobber()) {      if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { +      // FIXME: Can we pass in either of dest/src alignment here instead +      // of conservatively taking the minimum? +      unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment());        if (performCallSlotOptzn(M, M->getDest(), M->getSource(), -                               CopySize->getZExtValue(), M->getAlignment(), +                               CopySize->getZExtValue(), Align,                                 C)) {          MD->removeInstruction(M);          M->eraseFromParent(); @@ -1337,7 +1347,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {    // source of the memcpy to the alignment we need.  If we fail, we bail out.    AssumptionCache &AC = LookupAssumptionCache();    DominatorTree &DT = LookupDomTree(); -  if (MDep->getAlignment() < ByValAlign && +  if (MDep->getSourceAlignment() < ByValAlign &&        getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,                                   CS.getInstruction(), &AC, &DT) < ByValAlign)      return false; diff --git a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll index f6b175e4171..9dbba093fe2 100644 --- a/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll @@ -3,6 +3,34 @@  %T = type { i8, i32 } +; Ensure load-store forwarding of an aggregate is interpreted as +; a memmove when the source and dest may alias +define void @test_memmove(%T* align 8 %a, %T* align 16 %b) { +; CHECK-LABEL: @test_memmove( +; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8* +; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8* +; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false) +; CHECK-NEXT:    ret void +; +  %val = load %T, %T* %a, align 8 +  store %T %val, %T* %b, align 16 +  ret void +} + +; Ensure load-store forwarding of an aggregate is interpreted as +; a memcpy when the source and dest do not alias +define void @test_memcpy(%T* noalias align 8 %a, %T* noalias align 16 %b) { +; CHECK-LABEL: @test_memcpy( +; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %T* [[B:%.*]] to i8* +; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %T* [[A:%.*]] to i8* +; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP1]], i8* align 8 [[TMP2]], i64 8, i1 false) +; CHECK-NEXT:    ret void +; +  %val = load %T, %T* %a, align 8 +  store %T %val, %T* %b, align 16 +  ret void +} +  ; memcpy(%d, %a) should not be generated since store2 may-aliases load %a.  define void @f(%T* %a, %T* %b, %T* %c, %T* %d) {  ; CHECK-LABEL: @f( diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll index 7e1e4d54a24..4c5f6cbeb1a 100644 --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -46,6 +46,20 @@ define void @test2(i8* %P, i8* %Q) nounwind  {  ; CHECK-NEXT: ret void  } +; The intermediate alloca and one of the memcpy's should be eliminated, the +; other should be related with a memcpy. +define void @test2_memcpy(i8* noalias %P, i8* noalias %Q) nounwind  { +  %memtmp = alloca %0, align 16 +  %R = bitcast %0* %memtmp to i8* +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %R, i8* align 16 %P, i32 32, i1 false) +  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %Q, i8* align 16 %R, i32 32, i1 false) +  ret void + +; CHECK-LABEL: @test2_memcpy( +; CHECK-NEXT: call void @llvm.memcpy{{.*}}(i8* align 16 %Q, i8* align 16 %P +; CHECK-NEXT: ret void +} +  | 

