diff options
| author | Tim Northover <tnorthover@apple.com> | 2014-05-30 10:09:59 +0000 |
|---|---|---|
| committer | Tim Northover <tnorthover@apple.com> | 2014-05-30 10:09:59 +0000 |
| commit | b4ddc0845ab5260023e9afa3f7bc71a0bc731ae6 (patch) | |
| tree | 37a8539f9db4739dd57e7bdf39433594e822e2ec /llvm/lib | |
| parent | 5070c18928e4b4855202ee327f537c7a1969051e (diff) | |
| download | bcm5719-llvm-b4ddc0845ab5260023e9afa3f7bc71a0bc731ae6.tar.gz bcm5719-llvm-b4ddc0845ab5260023e9afa3f7bc71a0bc731ae6.zip | |
ARM & AArch64: make use of common cmpxchg idioms after expansion
The C and C++ semantics for compare_exchange require it to return a bool
indicating success. This gets mapped to LLVM IR which follows each cmpxchg with
an icmp of the value loaded against the desired value.
When lowered to ldxr/stxr loops, this extra comparison is redundant: its
results are implicit in the control-flow of the function.
This commit makes two changes: it replaces that icmp with appropriate PHI
nodes, and then makes sure earlyCSE is called after expansion to actually make
use of the opportunities revealed.
I've also added -{arm,aarch64}-enable-atomic-tidy options, so that
existing fragile tests aren't perturbed too much by the change. Many
of them either rely on undef/unreachable too pervasively to be
restored to something well-defined (particularly while making sure
they test the same obscure assert from many years ago), or depend on a
particular CFG shape, which is disrupted by SimplifyCFG.
rdar://problem/16227836
llvm-svn: 209883
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp | 42 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetMachine.cpp | 22 |
3 files changed, 81 insertions, 8 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp index d995333971b..d6d9907be7e 100644 --- a/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -300,12 +300,50 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); - // Finally, make sure later instructions don't get reordered with a fence if - // necessary. + // Make sure later instructions don't get reordered with a fence if necessary. Builder.SetInsertPoint(BarrierBB); insertTrailingFence(Builder, SuccessOrder); Builder.CreateBr(ExitBB); + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(FailureBB, FailureBB->begin()); + BasicBlock *SuccessBB = FailureOrder == Monotonic ? BarrierBB : TryStoreBB; + PHINode *Success = 0, *Failure = 0; + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + for (auto User : CI->users()) { + ICmpInst *ICmp = dyn_cast<ICmpInst>(User); + if (!ICmp) + continue; + + // Because we know ICmp uses CI, we only need one operand to be the old + // value. + if (ICmp->getOperand(0) != CI->getCompareOperand() && + ICmp->getOperand(1) != CI->getCompareOperand()) + continue; + + if (ICmp->getPredicate() == CmpInst::ICMP_EQ) { + if (!Success) { + Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), LoopBB); + } + ICmp->replaceAllUsesWith(Success); + } else if (ICmp->getPredicate() == CmpInst::ICMP_NE) { + if (!Failure) { + Failure = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Failure->addIncoming(ConstantInt::getFalse(Ctx), SuccessBB); + Failure->addIncoming(ConstantInt::getTrue(Ctx), LoopBB); + } + ICmp->replaceAllUsesWith(Failure); + } + } + CI->replaceAllUsesWith(Loaded); CI->eraseFromParent(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 0b5dd2f067e..ba301e95538 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -53,6 +53,12 @@ static cl::opt<bool> EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> +EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); @@ -113,6 +119,7 @@ public: return getTM<AArch64TargetMachine>(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addILPOpts() override; @@ -135,6 +142,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { return new AArch64PassConfig(this, PM); } +void AArch64PassConfig::addIRPasses() { + // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg + // ourselves. + addPass(createAtomicExpandLoadLinkedPass(TM)); + + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + + TargetPassConfig::addIRPasses(); +} + // Pass Pipeline Configuration bool AArch64PassConfig::addPreISel() { // Run promote constant before global merge, so that the promoted constants @@ -146,10 +167,6 @@ bool AArch64PassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); - // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg - // ourselves. - addPass(createAtomicExpandLoadLinkedPass(TM)); - return false; } diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 887622705ed..6ef2ea4e103 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, cl::desc("Inhibit optimization of S->D register accesses on A15"), cl::init(false)); +static cl::opt<bool> +EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); @@ -213,6 +219,7 @@ public: return *getARMTargetMachine().getSubtargetImpl(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addPreRegAlloc() override; @@ -225,11 +232,22 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(this, PM); } -bool ARMPassConfig::addPreISel() { +void ARMPassConfig::addIRPasses() { const ARMSubtarget *Subtarget = &getARMSubtarget(); - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { addPass(createAtomicExpandLoadLinkedPass(TM)); + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + } + + TargetPassConfig::addIRPasses(); +} + +bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); |

