diff options
author | Robin Morisset <morisset@google.com> | 2014-09-23 20:31:14 +0000 |
---|---|---|
committer | Robin Morisset <morisset@google.com> | 2014-09-23 20:31:14 +0000 |
commit | dedef3325f08e506e5b07d167579f39f817015f6 (patch) | |
tree | a22199bb6f83f3ed91900d33bd0ea1cc52a14c51 /llvm/lib/CodeGen | |
parent | da016026474e35614be37cc3ac205e82e0fcdf81 (diff) | |
download | bcm5719-llvm-dedef3325f08e506e5b07d167579f39f817015f6.tar.gz bcm5719-llvm-dedef3325f08e506e5b07d167579f39f817015f6.zip |
Add AtomicExpandPass::bracketInstWithFences, and use it whenever getInsertFencesForAtomic would trigger in SelectionDAGBuilder
Summary:
The goal is to eventually remove all the code related to getInsertFencesForAtomic
in SelectionDAGBuilder as it is wrong (designed for ARM, not really portable, works
mostly by accident because the backends are overly conservative), and repeats the
same logic that goes in emitLeading/TrailingFence.
In this patch, I make AtomicExpandPass insert the fences as it knows better
where to put them. Because this requires getting the fences and not just
passing an IRBuilder around, I had to change the return type of
emitLeading/TrailingFence.
This code only triggers on ARM for now. Because it is earlier in the pipeline
than SelectionDAGBuilder, it triggers and lowers atomic accesses to atomic so
SelectionDAGBuilder does not add barriers anymore on ARM.
If this patch is accepted I plan to implement emitLeading/TrailingFence for all
backends that setInsertFencesForAtomic(true), which will allow both making them
less conservative and simplifying SelectionDAGBuilder once they are all using
this interface.
This should not cause any functionnal change so the existing tests are used
and not modified.
Test Plan: make check-all, benefits from existing tests of atomics on ARM
Reviewers: jfb, t.p.northover
Subscribers: aemerson, llvm-commits
Differential Revision: http://reviews.llvm.org/D5179
llvm-svn: 218329
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/AtomicExpandPass.cpp | 108 |
1 files changed, 70 insertions, 38 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 91fa66be952..3225731c05c 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file contains a pass (at IR level) to replace atomic instructions with -// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg. +// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg. // //===----------------------------------------------------------------------===// @@ -41,6 +41,8 @@ namespace { bool runOnFunction(Function &F) override; private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad); bool expandAtomicLoad(LoadInst *LI); bool expandAtomicStore(StoreInst *SI); bool expandAtomicRMW(AtomicRMWInst *AI); @@ -80,10 +82,45 @@ bool AtomicExpand::runOnFunction(Function &F) { auto SI = dyn_cast<StoreInst>(I); auto RMWI = dyn_cast<AtomicRMWInst>(I); auto CASI = dyn_cast<AtomicCmpXchgInst>(I); - assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) && "Unknown atomic instruction"); + auto FenceOrdering = Monotonic; + bool IsStore, IsLoad; + if (TargetLowering->getInsertFencesForAtomic()) { + if (LI && isAtLeastAcquire(LI->getOrdering())) { + FenceOrdering = LI->getOrdering(); + LI->setOrdering(Monotonic); + IsStore = false; + IsLoad = true; + } else if (SI && isAtLeastRelease(SI->getOrdering())) { + FenceOrdering = SI->getOrdering(); + SI->setOrdering(Monotonic); + IsStore = true; + IsLoad = false; + } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || + isAtLeastAcquire(RMWI->getOrdering()))) { + FenceOrdering = RMWI->getOrdering(); + RMWI->setOrdering(Monotonic); + IsStore = IsLoad = true; + } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { + // If a compare and swap is lowered to LL/SC, we can do smarter fence + // insertion, with a stronger one on the success path than on the + // failure path. As a result, fence insertion is directly done by + // expandAtomicCmpXchg in that case. + FenceOrdering = CASI->getSuccessOrdering(); + CASI->setSuccessOrdering(Monotonic); + CASI->setFailureOrdering(Monotonic); + IsStore = IsLoad = true; + } + + if (FenceOrdering != Monotonic) { + MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + } + } + if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) { MadeChange |= expandAtomicLoad(LI); } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) { @@ -97,30 +134,40 @@ bool AtomicExpand::runOnFunction(Function &F) { return MadeChange; } +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad) { + IRBuilder<> Builder(I); + + auto LeadingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence( + Builder, Order, IsStore, IsLoad); + + auto TrailingFence = + TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence( + Builder, Order, IsStore, IsLoad); + // The trailing fence is emitted before the instruction instead of after + // because there is no easy way of setting Builder insertion point after + // an instruction. So we must erase it from the BB, and insert it back + // in the right place. + // We have a guard here because not every atomic operation generates a + // trailing fence. + if (TrailingFence) { + TrailingFence->removeFromParent(); + TrailingFence->insertAfter(I); + } + + return (LeadingFence || TrailingFence); +} + bool AtomicExpand::expandAtomicLoad(LoadInst *LI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we - // should preserve the ordering. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); IRBuilder<> Builder(LI); - // Note that although no fence is required before atomic load on ARM, it is - // required before SequentiallyConsistent loads for the recommended Power - // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html). - // So we let the target choose what to emit. - TLI->emitLeadingFence(Builder, LI->getOrdering(), - /*IsStore=*/false, /*IsLoad=*/true); - - // The only 64-bit load guaranteed to be single-copy atomic by ARM is - // an ldrexd (A3.5.3). + // On some architectures, load-linked instructions are atomic for larger + // sizes than normal loads. For example, the only 64-bit load guaranteed + // to be single-copy atomic by ARM is an ldrexd (A3.5.3). Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); - - TLI->emitTrailingFence(Builder, LI->getOrdering(), - /*IsStore=*/false, /*IsLoad=*/true); + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); LI->replaceAllUsesWith(Val); LI->eraseFromParent(); @@ -193,17 +240,11 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { auto TLI = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering FenceOrder = AI->getOrdering(); + AtomicOrdering MemOpOrder = AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we - // should preserve the ordering. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder; // Given: atomicrmw some_op iN* %addr, iN %incr ordering // @@ -230,7 +271,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -247,7 +287,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -256,11 +295,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) { } bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { - auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering(); - AtomicOrdering FenceOrder = - AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); AtomicOrdering MemOpOrder = - TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder; + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); @@ -292,8 +328,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TargetLowering->emitLeadingFence(Builder, FenceOrder, - /*IsStore=*/true, /*IsLoad=*/true); LoadInst *InitLoaded = Builder.CreateLoad(Addr); // Atomics require at least natural alignment. InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); @@ -317,8 +351,6 @@ bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) { Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - TargetLowering->emitTrailingFence(Builder, FenceOrder, - /*IsStore=*/true, /*IsLoad=*/true); AI->replaceAllUsesWith(NewLoaded); AI->eraseFromParent(); |