3 files changed, 109 insertions, 52 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index d2ed07775bc..ed4924e1ba0 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -44,9 +44,6 @@ namespace {
     bool expandAtomicStore(StoreInst *LI);
     bool expandAtomicRMW(AtomicRMWInst *AI);
     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
-    AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
-    void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
   };
 }
 
@@ -98,20 +95,29 @@ bool AtomicExpand::runOnFunction(Function &F) {
 }
 
 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
-  // Load instructions don't actually need a leading fence, even in the
-  // SequentiallyConsistent case.
+  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+  // If getInsertFencesForAtomic() returns true, then the target does not want
+  // to deal with memory orders, and emitLeading/TrailingFence should take care
+  // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+  // should preserve the ordering.
   AtomicOrdering MemOpOrder =
-      TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()
-          ? Monotonic
-          : LI->getOrdering();
+      TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+  IRBuilder<> Builder(LI);
 
-  // The only 64-bit load guaranteed to be single-copy atomic by the ARM is
+  // Note that although no fence is required before atomic load on ARM, it is
+  // required before SequentiallyConsistent loads for the recommended Power
+  // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html).
+  // So we let the target choose what to emit.
+  TLI->emitLeadingFence(Builder, LI->getOrdering(),
+                        /*IsStore=*/false, /*IsLoad=*/true);
+
+  // The only 64-bit load guaranteed to be single-copy atomic by ARM is
   // an ldrexd (A3.5.3).
-  IRBuilder<> Builder(LI);
-  Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
-      Builder, LI->getPointerOperand(), MemOpOrder);
+  Value *Val =
+      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
 
-  insertTrailingFence(Builder, LI->getOrdering());
+  TLI->emitTrailingFence(Builder, LI->getOrdering(),
+                         /*IsStore=*/false, /*IsLoad=*/true);
 
   LI->replaceAllUsesWith(Val);
   LI->eraseFromParent();
@@ -134,11 +140,18 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
 }
 
 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
+  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering Order = AI->getOrdering();
   Value *Addr = AI->getPointerOperand();
   BasicBlock *BB = AI->getParent();
   Function *F = BB->getParent();
   LLVMContext &Ctx = F->getContext();
+  // If getInsertFencesForAtomic() return true, then the target does not want to
+  // deal with memory orders, and emitLeading/TrailingFence should take care of
+  // everything. Otherwise, emitLeading/TrailingFence are no-op and we should
+  // preserve the ordering.
+  AtomicOrdering MemOpOrder =
+      TLI->getInsertFencesForAtomic() ? Monotonic : Order;
 
   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
   //
@@ -165,13 +178,12 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+  TLI->emitLeadingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
   Builder.CreateBr(LoopBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
-      Builder, Addr, MemOpOrder);
+  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
 
   Value *NewVal;
   switch (AI->getOperation()) {
@@ -218,14 +230,13 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
   }
 
   Value *StoreSuccess =
-      TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
-          Builder, NewVal, Addr, MemOpOrder);
+      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
   Value *TryAgain = Builder.CreateICmpNE(
       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-  insertTrailingFence(Builder, Order);
+  TLI->emitTrailingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
 
   AI->replaceAllUsesWith(Loaded);
   AI->eraseFromParent();
@@ -234,12 +245,19 @@ bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
 }
 
 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
   AtomicOrdering FailureOrder = CI->getFailureOrdering();
   Value *Addr = CI->getPointerOperand();
   BasicBlock *BB = CI->getParent();
   Function *F = BB->getParent();
   LLVMContext &Ctx = F->getContext();
+  // If getInsertFencesForAtomic() return true, then the target does not want to
+  // deal with memory orders, and emitLeading/TrailingFence should take care of
+  // everything. Otherwise, emitLeading/TrailingFence are no-op and we should
+  // preserve the ordering.
+  AtomicOrdering MemOpOrder =
+      TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
 
   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
   //
@@ -280,13 +298,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
+  TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+                        /*IsLoad=*/true);
   Builder.CreateBr(LoopBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
-      Builder, Addr, MemOpOrder);
+  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 
@@ -295,9 +313,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
   Builder.SetInsertPoint(TryStoreBB);
-  Value *StoreSuccess =
-      TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
-          Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+  Value *StoreSuccess = TLI->emitStoreConditional(
+      Builder, CI->getNewValOperand(), Addr, MemOpOrder);
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   Builder.CreateCondBr(StoreSuccess, SuccessBB,
@@ -305,11 +322,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // Make sure later instructions don't get reordered with a fence if necessary.
   Builder.SetInsertPoint(SuccessBB);
-  insertTrailingFence(Builder, SuccessOrder);
+  TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+                         /*IsLoad=*/true);
   Builder.CreateBr(ExitBB);
 
   Builder.SetInsertPoint(FailureBB);
-  insertTrailingFence(Builder, FailureOrder);
+  TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+                         /*IsLoad=*/true);
   Builder.CreateBr(ExitBB);
 
   // Finally, we have control-flow based knowledge of whether the cmpxchg
@@ -358,27 +377,3 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   CI->eraseFromParent();
   return true;
 }
-
-AtomicOrdering AtomicExpand::insertLeadingFence(IRBuilder<> &Builder,
-                                                       AtomicOrdering Ord) {
-  if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
-    return Ord;
-
-  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
-    Builder.CreateFence(Release);
-
-  // The exclusive operations don't need any barrier if we're adding separate
-  // fences.
-  return Monotonic;
-}
-
-void AtomicExpand::insertTrailingFence(IRBuilder<> &Builder,
-                                              AtomicOrdering Ord) {
-  if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
-    return;
-
-  if (Ord == Acquire || Ord == AcquireRelease)
-    Builder.CreateFence(Acquire);
-  else if (Ord == SequentiallyConsistent)
-    Builder.CreateFence(SequentiallyConsistent);
-}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 0e78204bbf8..aeed2b2dede 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2723,7 +2723,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
 
   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
-  unsigned Domain = ARM_MB::ISH;
+  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
   if (Subtarget->isMClass()) {
     // Only a full system barrier exists in the M-class architectures.
     Domain = ARM_MB::SY;
@@ -10982,6 +10982,63 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return true;
 }
 
+static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) {
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+  Constant *CDomain = Builder.getInt32(Domain);
+  Builder.CreateCall(DMB, CDomain);
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+                                         AtomicOrdering Ord, bool IsStore,
+                                         bool IsLoad) const {
+  if (!getInsertFencesForAtomic())
+    return;
+
+  switch (Ord) {
+  case NotAtomic:
+  case Unordered:
+    llvm_unreachable("Invalid fence: unordered/non-atomic");
+  case Monotonic:
+  case Acquire:
+    return; // Nothing to do
+  case SequentiallyConsistent:
+    if (!IsStore)
+      return; // Nothing to do
+              /*FALLTHROUGH*/
+  case Release:
+  case AcquireRelease:
+    if (Subtarget->isSwift())
+      makeDMB(Builder, ARM_MB::ISHST);
+    // FIXME: add a comment with a link to documentation justifying this.
+    else
+      makeDMB(Builder, ARM_MB::ISH);
+    return;
+  }
+}
+
+void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+                                          AtomicOrdering Ord, bool IsStore,
+                                          bool IsLoad) const {
+  if (!getInsertFencesForAtomic())
+    return;
+
+  switch (Ord) {
+  case NotAtomic:
+  case Unordered:
+    llvm_unreachable("Invalid fence: unordered/not-atomic");
+  case Monotonic:
+  case Release:
+    return; // Nothing to do
+  case Acquire:
+  case AcquireRelease:
+    case SequentiallyConsistent:
+    makeDMB(Builder, ARM_MB::ISH);
+    return;
+  }
+}
+
 bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
   // Loads and stores less than 64-bits are already atomic; ones above that
   // are doomed anyway, so defer to the default libcall and blame the OS when
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 3ad9f13d453..de91014a07c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -397,6 +397,11 @@ namespace llvm {
     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                 Value *Addr, AtomicOrdering Ord) const override;
 
+    void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                          bool IsStore, bool IsLoad) const override;
+    void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                           bool IsStore, bool IsLoad) const override;
+
     bool shouldExpandAtomicInIR(Instruction *Inst) const override;
 
     bool useLoadStackGuardNode() const override;