diff options
-rw-r--r-- | llvm/lib/CodeGen/AtomicExpandPass.cpp | 123 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/atomic-64bit.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/atomic-op.ll | 26 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/cmpxchg-idioms.ll | 65 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/cmpxchg-weak.ll | 6 | ||||
-rw-r--r-- | llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll | 86 | ||||
-rw-r--r-- | llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll | 53 |
7 files changed, 293 insertions, 84 deletions
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 457e7b38560..6ac8a8381bf 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -521,30 +521,62 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { AtomicOrdering MemOpOrder = TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; + // In implementations which use a barrier to achieve release semantics, we can + // delay emitting this barrier until we know a store is actually going to be + // attempted. The cost of this delay is that we need 2 copies of the block + // emitting the load-linked, affecting code size. + // + // Ideally, this logic would be unconditional except for the minsize check + // since in other cases the extra blocks naturally collapse down to the + // minimal loop. Unfortunately, this puts too much stress on later + // optimisations so we avoid emitting the extra logic in those cases too. + bool HasReleasedLoadBB = !CI->isWeak() && TLI->getInsertFencesForAtomic() && + SuccessOrder != Monotonic && + SuccessOrder != Acquire && !F->optForMinSize(); + + // There's no overhead for sinking the release barrier in a weak cmpxchg, so + // do it even on minsize. + bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak(); + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] - // fence? // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, + // %unreleasedload = @load.linked(%addr) + // %should_store = icmp eq %unreleasedload, %desired + // br i1 %should_store, label %cmpxchg.fencedstore, // label %cmpxchg.nostore + // cmpxchg.releasingstore: + // fence? + // br label cmpxchg.trystore // cmpxchg.trystore: + // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // [%releasedload, %cmpxchg.releasedload] // %stored = @store_conditional(%new, %addr) // %success = icmp eq i32 %stored, 0 - // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // br i1 %success, label %cmpxchg.success, + // label %cmpxchg.releasedload/%cmpxchg.failure + // cmpxchg.releasedload: + // %releasedload = @load.linked(%addr) + // %should_store = icmp eq %releasedload, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure // cmpxchg.success: // fence? // br label %cmpxchg.end // cmpxchg.nostore: + // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start], + // [%releasedload, + // %cmpxchg.releasedload/%cmpxchg.trystore] // @load_linked_fail_balance()? // br label %cmpxchg.failure // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: + // %loaded = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 @@ -553,8 +585,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + auto ReleasedLoadBB = + BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB); + auto TryStoreBB = + BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB); + auto ReleasingStoreBB = + BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB); + auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB); // This grabs the DebugLoc from CI IRBuilder<> Builder(CI); @@ -564,29 +601,51 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); - Builder.CreateBr(LoopBB); + if (UseUnconditionalReleaseBarrier) + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + Builder.SetInsertPoint(StartBB); + Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = Builder.CreateICmpEQ( + UnreleasedLoad, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); + + Builder.SetInsertPoint(ReleasingStoreBB); + if (!UseUnconditionalReleaseBarrier) + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); Value *StoreSuccess = TLI->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; Builder.CreateCondBr(StoreSuccess, SuccessBB, - CI->isWeak() ? FailureBB : LoopBB); - - // Make sure later instructions don't get reordered with a fence if necessary. + CI->isWeak() ? FailureBB : RetryBB); + + Builder.SetInsertPoint(ReleasedLoadBB); + Value *SecondLoad; + if (HasReleasedLoadBB) { + SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), + "should_store"); + + // If the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + } else + Builder.CreateUnreachable(); + + // Make sure later instructions don't get reordered with a fence if + // necessary. Builder.SetInsertPoint(SuccessBB); TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); @@ -606,14 +665,36 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Finally, we have control-flow based knowledge of whether the cmpxchg // succeeded or not. We expose this to later passes by converting any - // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. - - // Setup the builder so we can create any PHIs we need. + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate + // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + // Setup the builder so we can create any PHIs we need. + Value *Loaded; + if (!HasReleasedLoadBB) + Loaded = UnreleasedLoad; + else { + Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); + PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); + TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); + TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); + + Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); + PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); + NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); + NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); + + Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); + PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); + ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); + ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); + + Loaded = ExitLoaded; + } + // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. SmallVector<ExtractValueInst *, 2> PrunedInsts; diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll index 573cd45c082..a188a954f9d 100644 --- a/llvm/test/CodeGen/ARM/atomic-64bit.ll +++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll @@ -172,31 +172,31 @@ define i64 @test6(i64* %ptr, i64 %val) { define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK-LABEL: test7: ; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1 -; CHECK-DAG: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] -; CHECK-LE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], [[VAL1LO]] -; CHECK-LE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2 -; CHECK-BE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2 -; CHECK-BE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1 +; CHECK-LE-DAG: eor [[MISMATCH_LO:.*]], [[REG1]], [[VAL1LO]] +; CHECK-LE-DAG: eor [[MISMATCH_HI:.*]], [[REG2]], r2 +; CHECK-BE-DAG: eor [[MISMATCH_LO:.*]], [[REG2]], r2 +; CHECK-BE-DAG: eor [[MISMATCH_HI:.*]], [[REG1]], r1 ; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK: bne +; CHECK-DAG: dmb {{ish$}} ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} ; CHECK: cmp -; CHECK: bne +; CHECK: beq ; CHECK: dmb {{ish$}} ; CHECK-THUMB-LABEL: test7: -; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 ; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2 ; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3 -; CHECK-THUMB-LE: orrs [[MISMATCH_HI]], [[MISMATCH_LO]] +; CHECK-THUMB-LE: orrs.w {{.*}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK-THUMB: bne +; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} ; CHECK-THUMB: cmp -; CHECK-THUMB: bne +; CHECK-THUMB: beq ; CHECK-THUMB: dmb {{ish$}} %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll index 79138945661..f7ef492cd50 100644 --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -272,31 +272,37 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 -; CHECK-ARMV7: dmb ish -; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]: ; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] ; CHECK-ARMV7: cmp [[OLDVAL]], r1 ; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]] +; CHECK-ARMV7: dmb ish +; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]: ; CHECK-ARMV7: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] ; CHECK-ARMV7: cmp [[SUCCESS]], #0 -; CHECK-ARMV7: bne [[LOOP_BB]] -; CHECK-ARMV7: dmb ish -; CHECK-ARMV7: bx lr +; CHECK-ARMV7: beq [[SUCCESS_BB:\.?LBB.*]] +; CHECK-ARMV7: ldrex [[OLDVAL]], [r[[ADDR]]] +; CHECK-ARMV7: cmp [[OLDVAL]], r1 +; CHECK-ARMV7: beq [[LOOP_BB]] ; CHECK-ARMV7: [[FAIL_BB]]: ; CHECK-ARMV7: clrex ; CHECK-ARMV7: bx lr +; CHECK-ARMV7: [[SUCCESS_BB]]: +; CHECK-ARMV7: dmb ish +; CHECK-ARMV7: bx lr -; CHECK-T2: dmb ish -; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]: ; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] ; CHECK-T2: cmp [[OLDVAL]], r1 -; CHECK-T2: clrexne -; CHECK-T2: bxne lr +; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]] +; CHECK-T2: dmb ish +; CHECK-T2: [[LOOP_BB:\.?LBB.*]]: ; CHECK-T2: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]] ; CHECK-T2: cmp [[SUCCESS]], #0 ; CHECK-T2: dmbeq ish ; CHECK-T2: bxeq lr -; CHECK-T2: b [[LOOP_BB]] +; CHECK-T2: ldrex [[OLDVAL]], [r[[ADDR]]] +; CHECK-T2: cmp [[OLDVAL]], r1 +; CHECK-T2: beq [[LOOP_BB]] +; CHECK-T2: clrex ret i32 %oldval } diff --git a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll index 81e05acfef7..283202f0cc1 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll @@ -3,26 +3,31 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { ; CHECK-LABEL: test_return: -; CHECK: dmb ishst - -; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: ldrex [[LOADED:r[0-9]+]], [r0] ; CHECK: cmp [[LOADED]], r1 ; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]] +; CHECK: dmb ishst + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0] -; CHECK: cmp [[STATUS]], #0 -; CHECK: bne [[LOOP]] +; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]] +; CHECK: ldrex [[LOADED]], [r0] +; CHECK: cmp [[LOADED]], r1 +; CHECK: beq [[LOOP]] + +; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: clrex ; CHECK: dmb ish -; CHECK: movs r0, #1 +; CHECK: movs r0, #0 ; CHECK: bx lr -; CHECK: [[FAILED]]: +; CHECK: [[SUCCESS]]: ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} ; CHECK: dmb ish -; CHECK: movs r0, #0 +; CHECK: movs r0, #1 ; CHECK: bx lr %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst @@ -34,26 +39,33 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) { define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { ; CHECK-LABEL: test_return_bool: -; CHECK: dmb ishst ; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1 -; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0] ; CHECK: cmp [[LOADED]], [[OLDBYTE]] ; CHECK: bne [[FAIL:LBB[0-9]+_[0-9]+]] +; CHECK: dmb ishst + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0] -; CHECK: cmp [[STATUS]], #0 -; CHECK: bne [[LOOP]] +; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]] + +; CHECK: ldrexb [[LOADED]], [r0] +; CHECK: cmp [[LOADED]], [[OLDBYTE]] +; CHECK: beq [[LOOP]] + ; FIXME: this eor is redundant. Need to teach DAG combine that. -; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} -; CHECK: movs [[TMP:r[0-9]+]], #1 +; CHECK: [[FAIL]]: +; CHECK: clrex +; CHECK: movs [[TMP:r[0-9]+]], #0 ; CHECK: eor r0, [[TMP]], #1 ; CHECK: bx lr -; CHECK: [[FAIL]]: -; CHECK: movs [[TMP:r[0-9]+]], #0 +; CHECK: [[SUCCESS]]: +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: movs [[TMP:r[0-9]+]], #1 ; CHECK: eor r0, [[TMP]], #1 ; CHECK: bx lr @@ -67,26 +79,31 @@ define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) { ; CHECK-LABEL: test_conditional: -; CHECK: dmb ishst - -; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: ldrex [[LOADED:r[0-9]+]], [r0] ; CHECK: cmp [[LOADED]], r1 ; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]] +; CHECK: dmb ishst + +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: ; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] -; CHECK: cmp [[STATUS]], #0 -; CHECK: bne [[LOOP]] +; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]] -; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} -; CHECK: dmb ish -; CHECK: b.w _bar +; CHECK: ldrex [[LOADED]], [r0] +; CHECK: cmp [[LOADED]], r1 +; CHECK: beq [[LOOP]] ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: clrex ; CHECK: dmb ish ; CHECK: b.w _baz +; CHECK: [[SUCCESS]]: +; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: dmb ish +; CHECK: b.w _bar + %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst %success = extractvalue { i32, i1 } %pair, 1 br i1 %success, label %true, label %false diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll index 1eac9c41cf9..4038528c91b 100644 --- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll +++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll @@ -6,11 +6,11 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 ; CHECK-NEXT: BB#0: -; CHECK-NEXT: dmb ish ; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0] ; CHECK-NEXT: cmp [[LOADED]], r1 ; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: BB#1: +; CHECK-NEXT: dmb ish ; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-NEXT: cmp [[SUCCESS]], #0 ; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]] @@ -36,13 +36,13 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) { %success = extractvalue { i32, i1 } %pair, 1 ; CHECK-NEXT: BB#0: -; CHECK-NEXT: dmb ish ; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1] ; CHECK-NEXT: cmp [[LOADED]], r2 ; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]] ; CHECK-NEXT: BB#1: -; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] +; CHECK-NEXT: dmb ish ; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1] ; CHECK-NEXT: cmp [[SUCCESS]], #0 ; CHECK-NEXT: bxne lr ; CHECK-NEXT: dmb ish diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll index 4647e8fd6d9..31d970f7ef1 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -222,26 +222,37 @@ define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) { define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst -; CHECK: call void @llvm.arm.dmb(i32 11) -; CHECK: br label %[[LOOP:.*]] +; CHECK: br label %[[START:.*]] -; CHECK: [[LOOP]]: +; CHECK: [[START]]: ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) -; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8 +; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] -; CHECK: [[TRY_STORE]]: +; CHECK: [[FENCED_STORE]]: +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]] + +; CHECK: [[RELEASED_LOAD]]: +; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr) +; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8 +; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired +; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]] ; CHECK: [[SUCCESS_BB]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] @@ -251,7 +262,8 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { ; CHECK: [[DONE]]: ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i8 [[OLDVAL]] +; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] +; CHECK: ret i8 [[LOADED]] %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst %old = extractvalue { i8, i1 } %pairold, 0 @@ -260,26 +272,37 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) { define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) { ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic -; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] -; CHECK: [[TRY_STORE]]: +; CHECK: [[FENCED_STORE]]: +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[LOOP:.*]] + +; CHECK: [[LOOP]]: +; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ] ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr) ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0 -; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]] +; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]] + +; CHECK: [[RELEASED_LOAD]]: +; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr) +; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16 +; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired +; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]] ; CHECK: [[SUCCESS_BB]]: ; CHECK: call void @llvm.arm.dmb(i32 11) ; CHECK: br label %[[DONE:.*]] ; CHECK: [[NO_STORE_BB]]: +; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ] ; CHECK-NEXT: call void @llvm.arm.clrex() ; CHECK-NEXT: br label %[[FAILURE_BB:.*]] @@ -289,7 +312,8 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv ; CHECK: [[DONE]]: ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] -; CHECK: ret i16 [[OLDVAL]] +; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] +; CHECK: ret i16 [[LOADED]] %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic %old = extractvalue { i16, i1 } %pairold, 0 @@ -378,3 +402,39 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n %old = extractvalue { i64, i1 } %pairold, 0 ret i64 %old } + +define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize { +; CHECK-LABEL: @test_cmpxchg_minsize +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[START:.*]] + +; CHECK: [[START]]: +; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) +; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 +; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[END:.*]] + +; CHECK: [[NO_STORE_BB]]: +; CHECK: call void @llvm.arm.clrex() +; CHECK: br label %[[FAILURE_BB]] + +; CHECK: [[FAILURE_BB]]: +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[END]] + +; CHECK: [[END]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i32 [[LOADED]] + + %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst + %oldval = extractvalue { i32, i1 } %pair, 0 + ret i32 %oldval +} diff --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll index f9aa524fac9..02e4dd1f1d5 100644 --- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll @@ -3,13 +3,16 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { ; CHECK-LABEL: @test_cmpxchg_seq_cst ; Intrinsic for "dmb ishst" is then expected -; CHECK: call void @llvm.arm.dmb(i32 10) ; CHECK: br label %[[START:.*]] ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK: call void @llvm.arm.dmb(i32 10) +; CHECK: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) @@ -39,13 +42,16 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) { ; CHECK-LABEL: @test_cmpxchg_weak_fail -; CHECK: call void @llvm.arm.dmb(i32 10) ; CHECK: br label %[[START:.*]] ; CHECK: [[START]]: ; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired -; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]] +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK: call void @llvm.arm.dmb(i32 10) +; CHECK: br label %[[TRY_STORE:.*]] ; CHECK: [[TRY_STORE]]: ; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) @@ -108,3 +114,42 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) { %oldval = extractvalue { i32, i1 } %pair, 0 ret i32 %oldval } + +define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) minsize { +; CHECK-LABEL: @test_cmpxchg_seq_cst_minsize +; CHECK: br label %[[START:.*]] + +; CHECK: [[START]]: +; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr) +; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired +; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]] + +; CHECK: [[FENCED_STORE]]: +; CHECK: call void @llvm.arm.dmb(i32 10) +; CHECK: br label %[[TRY_STORE:.*]] + +; CHECK: [[TRY_STORE]]: +; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr) +; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0 +; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]] + +; CHECK: [[SUCCESS_BB]]: +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[END:.*]] + +; CHECK: [[NO_STORE_BB]]: +; CHECK: call void @llvm.arm.clrex() +; CHECK: br label %[[FAILURE_BB]] + +; CHECK: [[FAILURE_BB]]: +; CHECK: call void @llvm.arm.dmb(i32 11) +; CHECK: br label %[[END]] + +; CHECK: [[END]]: +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] +; CHECK: ret i32 [[LOADED]] + + %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst + %oldval = extractvalue { i32, i1 } %pair, 0 + ret i32 %oldval +} |