diff options
author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-09-22 17:21:44 +0000 |
---|---|---|
committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-09-22 17:21:44 +0000 |
commit | 07a844d758f09e1dd07ca17eb38a22d69d63fae3 (patch) | |
tree | 4b7b38a24d18b1e32cd081cc7880fd62b6fc7cef /llvm/lib/CodeGen | |
parent | d13c4fb7f202dc19e8c127428ddfe941f944b729 (diff) | |
download | bcm5719-llvm-07a844d758f09e1dd07ca17eb38a22d69d63fae3.tar.gz bcm5719-llvm-07a844d758f09e1dd07ca17eb38a22d69d63fae3.zip |
[AArch64] Emit clrex in the expanded cmpxchg fail block.
In the comparison failure block of a cmpxchg expansion, the initial
ldrex/ldxr will not be followed by a matching strex/stxr.
On ARM/AArch64, this unnecessarily ties up the execution monitor,
which might have a negative performance impact on some uarchs.
Instead, release the monitor in the failure block.
The clrex instruction was designed for this: use it.
Also see ARMARM v8-A B2.10.2:
"Exclusive access instructions and Shareable memory locations".
Differential Revision: http://reviews.llvm.org/D13033
llvm-svn: 248291
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/AtomicExpandPass.cpp | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 73102ccfece..0e5b62a3e34 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -374,7 +374,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.failure + // label %cmpxchg.nostore // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 @@ -382,6 +382,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // cmpxchg.success: // fence? // br label %cmpxchg.end + // cmpxchg.nostore: + // @load_linked_fail_balance()? + // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end @@ -392,7 +395,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); - auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); @@ -416,7 +420,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( @@ -432,6 +436,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { /*IsLoad=*/true); Builder.CreateBr(ExitBB); + Builder.SetInsertPoint(NoStoreBB); + // In the failing case, where we don't execute the store-conditional, the + // target might want to balance out the load-linked with a dedicated + // instruction (e.g., on ARM, clearing the exclusive monitor). + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); + Builder.CreateBr(FailureBB); + Builder.SetInsertPoint(FailureBB); TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, /*IsLoad=*/true); |