diff options
59 files changed, 3537 insertions, 3255 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 3000a7d0c3f..639b588766a 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -462,20 +462,17 @@ class MachineBlockPlacement : public MachineFunctionPass { const MachineBasicBlock *ExitBB, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop, - const MachineLoop &L, - const BlockFilterSet &LoopBlockSet, - bool HasStaticProfileOnly = false); - MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); - MachineBasicBlock *findBestLoopTopNoProfile( + MachineBasicBlock *findBestLoopTop( const MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit( - const MachineLoop &L, const BlockFilterSet &LoopBlockSet); + const MachineLoop &L, const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq); BlockFilterSet collectLoopBlockSet(const MachineLoop &L); void buildLoopChains(const MachineLoop &L); void rotateLoop( BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, - const BlockFilterSet &LoopBlockSet); + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet); void rotateLoopWithProfile( BlockChain &LoopChain, const MachineLoop &L, const BlockFilterSet &LoopBlockSet); @@ -1950,14 +1947,11 @@ MachineBlockPlacement::FallThroughGains( /// At the same time, move it before old top increases the taken branch /// to loop exit block, so the reduced taken branch will be compared with /// the increased taken branch to the loop exit block. -/// -/// This pattern is enabled only when PlainMode is false. MachineBasicBlock * MachineBlockPlacement::findBestLoopTopHelper( MachineBasicBlock *OldTop, const MachineLoop &L, - const BlockFilterSet &LoopBlockSet, - bool HasStaticProfileOnly) { + const BlockFilterSet &LoopBlockSet) { // Check that the header hasn't been fused with a preheader block due to // crazy branches. If it has, we need to start with the header at the top to // prevent pulling the preheader into the loop body. @@ -1981,38 +1975,22 @@ MachineBlockPlacement::findBestLoopTopHelper( if (Pred->succ_size() > 2) continue; + MachineBasicBlock *OtherBB = nullptr; + if (Pred->succ_size() == 2) { + OtherBB = *Pred->succ_begin(); + if (OtherBB == OldTop) + OtherBB = *Pred->succ_rbegin(); + } + if (!canMoveBottomBlockToTop(Pred, OldTop)) continue; - if (HasStaticProfileOnly) { - // In plain mode we consider pattern 1 only. - if (Pred->succ_size() > 1) - continue; - - BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); - if (!BestPred || PredFreq > BestGains || - (!(PredFreq < BestGains) && - Pred->isLayoutSuccessor(OldTop))) { - BestPred = Pred; - BestGains = PredFreq; - } - } else { - // With profile information we also consider pattern 2. - MachineBasicBlock *OtherBB = nullptr; - if (Pred->succ_size() == 2) { - OtherBB = *Pred->succ_begin(); - if (OtherBB == OldTop) - OtherBB = *Pred->succ_rbegin(); - } - - // And more sophisticated cost model. - BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, - LoopBlockSet); - if ((Gains > 0) && (Gains > BestGains || - ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { - BestPred = Pred; - BestGains = Gains; - } + BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB, + LoopBlockSet); + if ((Gains > 0) && (Gains > BestGains || + ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) { + BestPred = Pred; + BestGains = Gains; } } @@ -2032,7 +2010,7 @@ MachineBlockPlacement::findBestLoopTopHelper( return BestPred; } -/// Find the best loop top block for layout in FDO mode. +/// Find the best loop top block for layout. /// /// This function iteratively calls findBestLoopTopHelper, until no new better /// BB can be found. @@ -2060,34 +2038,6 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, return NewTop; } -/// Find the best loop top block for layout in plain mode. It is less agressive -/// than findBestLoopTop. -/// -/// Look for a block which is strictly better than the loop header for laying -/// out at the top of the loop. This looks for one and only one pattern: -/// a latch block with no conditional exit. This block will cause a conditional -/// jump around it or will be the bottom of the loop if we lay it out in place, -/// but if it doesn't end up at the bottom of the loop for any reason, -/// rotation alone won't fix it. Because such a block will always result in an -/// unconditional jump (for the backedge) rotating it in front of the loop -/// header is always profitable. -MachineBasicBlock * -MachineBlockPlacement::findBestLoopTopNoProfile( - const MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { - // Placing the latch block before the header may introduce an extra branch - // that skips this block the first time the loop is executed, which we want - // to avoid when optimising for size. - // FIXME: in theory there is a case that does not introduce a new branch, - // i.e. when the layout predecessor does not fallthrough to the loop header. - // In practice this never happens though: there always seems to be a preheader - // that can fallthrough and that is also placed before the header. - if (F->getFunction().hasOptSize()) - return L.getHeader(); - - return findBestLoopTopHelper(L.getHeader(), L, LoopBlockSet, true); -} - /// Find the best loop exiting block for layout. /// /// This routine implements the logic to analyze the loop looking for the best @@ -2095,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTopNoProfile( /// fallthrough opportunities. MachineBasicBlock * MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { + const BlockFilterSet &LoopBlockSet, + BlockFrequency &ExitFreq) { // We don't want to layout the loop linearly in all cases. If the loop header // is just a normal basic block in the loop, we want to look for what block // within the loop is the best one to layout at the top. However, if the loop @@ -2206,6 +2157,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + ExitFreq = BestExitEdgeFreq; return ExitingBB; } @@ -2250,6 +2202,7 @@ MachineBlockPlacement::hasViableTopFallthrough( /// of its bottom already, don't rotate it. void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, + BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet) { if (!ExitingBB) return; @@ -2273,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, (!SuccChain || Succ == *SuccChain->begin())) return; } + + // Rotate will destroy the top fallthrough, we need to ensure the new exit + // frequency is larger than top fallthrough. + BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); + if (FallThrough2Top >= ExitFreq) + return; } BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); @@ -2524,10 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // loop. This will default to the header, but may end up as one of the // predecessors to the header if there is one which will result in strictly // fewer branches in the loop body. - MachineBasicBlock *LoopTop = - (RotateLoopWithProfile || F->getFunction().hasProfileData()) ? - findBestLoopTop(L, LoopBlockSet) : - findBestLoopTopNoProfile(L, LoopBlockSet); + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate @@ -2536,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // Loops are processed innermost to uttermost, make sure we clear // PreferredLoopExit before processing a new loop. PreferredLoopExit = nullptr; + BlockFrequency ExitFreq; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) - PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); + PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq); BlockChain &LoopChain = *BlockToChain[LoopTop]; @@ -2554,11 +2511,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { buildChain(LoopTop, LoopChain, &LoopBlockSet); - if (RotateLoopWithProfile) { - if (LoopTop == L.getHeader()) - rotateLoopWithProfile(LoopChain, L, LoopBlockSet); - } else - rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet); + if (RotateLoopWithProfile) + rotateLoopWithProfile(LoopChain, L, LoopBlockSet); + else + rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet); LLVM_DEBUG({ // Crash at the end so we get all of the debugging output first. diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll index 088710c6737..3bafd8c3a35 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -111,7 +111,7 @@ define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) { ; CHECK: mov w22, #2 ; CHECK-NOT: mov w22, #4 ; CHECK-NOT: cmn w22, #4 -; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]] +; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond ; CHECK-NOT: b.ne [[LOOP2]] ; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}} ; CHECK: bl _foo diff --git a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll index eab296d6a64..c40d6dab437 100644 --- a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll +++ b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll @@ -1,9 +1,8 @@ ; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s ; CHECK-LABEL: test: -; CHECK: LBB0_7: -; CHECK: b.hi -; CHECK-NEXT: b +; CHECK-LABEL: %cond.false12.i +; CHECK: b.gt ; CHECK-NEXT: LBB0_8: ; CHECK-NEXT: mov x8, x9 ; CHECK-NEXT: LBB0_9: diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index a425bcf2bdf..6a8456d99bc 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -230,6 +230,11 @@ bb.end: ; preds = %bb.then, %bb ; Make sure scc liveness is updated if sor_b64 is removed ; ALL-LABEL: {{^}}scc_liveness: +; GCN: %bb10 +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: s_andn2_b64 +; GCN-NEXT: s_cbranch_execz + ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: ; GCN: s_andn2_b64 exec, exec, ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] @@ -240,10 +245,6 @@ bb.end: ; preds = %bb.then, %bb ; GCN-NOT: s_or_b64 exec, exec ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} -; GCN: s_andn2_b64 -; GCN-NEXT: s_cbranch_execnz - -; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} ; GCN: buffer_store_dword ; GCN: buffer_store_dword ; GCN: buffer_store_dword diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll index 2060ea42f16..c903a04039a 100644 --- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll +++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll @@ -20,38 +20,41 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9 ; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7 ; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3 -; CHECK-NEXT: BB0_1: ; %loop +; CHECK-NEXT: s_branch BB0_3 +; CHECK-NEXT: BB0_1: ; %Flow1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] +; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec +; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec +; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] +; CHECK-NEXT: s_cbranch_execz BB0_6 +; CHECK-NEXT: BB0_3: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc ; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec ; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec -; CHECK-NEXT: s_cbranch_vccz BB0_5 -; CHECK-NEXT: ; %bb.2: ; %endif1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: s_cbranch_vccz BB0_2 +; CHECK-NEXT: ; %bb.4: ; %endif1 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: s_mov_b64 s[6:7], -1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1] ; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9] -; CHECK-NEXT: ; mask branch BB0_4 -; CHECK-NEXT: BB0_3: ; %endif2 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: ; mask branch BB0_1 +; CHECK-NEXT: s_cbranch_execz BB0_1 +; CHECK-NEXT: BB0_5: ; %endif2 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: v_add_u32_e32 v1, 1, v1 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1 -; CHECK-NEXT: BB0_4: ; %Flow1 -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] -; CHECK-NEXT: s_mov_b64 s[8:9], 0 -; CHECK-NEXT: BB0_5: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7] -; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] -; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec -; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec -; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] -; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11] -; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] -; CHECK-NEXT: s_cbranch_execnz BB0_1 -; CHECK-NEXT: ; %bb.6: ; %Flow2 +; CHECK-NEXT: s_branch BB0_1 +; CHECK-NEXT: BB0_6: ; %Flow2 ; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] @@ -62,6 +65,7 @@ define amdgpu_ps void @main(i32, float) { ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; CHECK-NEXT: s_endpgm +; this is the divergent branch with the condition not marked as divergent start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) br label %loop diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll index be6e3fd05ae..1a675ce57bc 100644 --- a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -1,27 +1,28 @@ ; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s -; CHECK-LABEL: %bb11 +; CHECK-LABEL: %bb22 -; Load from %arg in a Loop body has alias store +; Load from %arg has alias store in Loop ; CHECK: flat_load_dword -; CHECK-LABEL: %bb20 -; CHECK: flat_store_dword +; ##################################################################### + +; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] + +; CHECK: s_load_dword ; ##################################################################### -; CHECK-LABEL: %bb22 +; CHECK-LABEL: %bb11 -; Load from %arg has alias store in Loop +; Load from %arg in a Loop body has alias store ; CHECK: flat_load_dword -; ##################################################################### - -; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i] +; CHECK-LABEL: %bb20 -; CHECK: s_load_dword +; CHECK: flat_store_dword define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { bb: diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll index ae78a1ecf32..a050bfe29bf 100644 --- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll @@ -3,20 +3,20 @@ ; SI-LABEL: {{^}}i1_copy_from_loop: ; +; SI: ; %Flow +; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec +; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec +; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] + ; SI: ; %for.body ; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4, -; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec +; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec ; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]] ; SI: ; %Flow1 ; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec -; SI: ; %Flow -; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec -; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec -; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]] - ; SI: ; %for.end ; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]] diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index dc7f495c42f..bde1cd5c435 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -630,12 +630,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; GCN-LABEL: {{^}}broken_phi_bb: ; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8 -; GCN: s_branch [[BB2:BB[0-9]+_[0-9]+]] - -; GCN: {{^BB[0-9]+_[0-9]+}}: -; GCN: s_mov_b64 exec, - -; GCN: [[BB2]]: +; GCN: [[BB2:BB[0-9]+_[0-9]+]]: ; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]] ; GCN: buffer_load_dword @@ -647,6 +642,11 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace( ; IDXMODE: s_set_gpr_idx_off ; GCN: s_cbranch_execnz [[REGLOOP]] + +; GCN: {{^; %bb.[0-9]}}: +; GCN: s_mov_b64 exec, +; GCN: s_branch [[BB2]] + define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index 486364acdb7..f374276aa60 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -61,9 +61,9 @@ loopexit: ; GCN-LABEL: {{^}}break_cond_is_arg: ; GCN: s_xor_b64 [[REG1:[^ ,]*]], {{[^ ,]*, -1$}} +; GCN: s_andn2_b64 exec, exec, [[REG3:[^ ,]*]] ; GCN: s_and_b64 [[REG2:[^ ,]*]], exec, [[REG1]] -; GCN: s_or_b64 [[REG3:[^ ,]*]], [[REG2]], -; GCN: s_andn2_b64 exec, exec, [[REG3]] +; GCN: s_or_b64 [[REG3]], [[REG2]], define void @break_cond_is_arg(i32 %arg, i1 %breakcond) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index d243233119b..0ae28c6ef79 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -24,13 +24,29 @@ ; GCN: ; %main_body ; GCN: s_mov_b64 [[LEFT_OUTER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: [[FLOW2:BB[0-9]+_[0-9]+]]: ; %Flow2 +; GCN: s_or_b64 exec, exec, [[TMP0:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] +; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] +; GCN: s_andn2_b64 exec, exec, [[TMP1]] +; GCN: s_cbranch_execz [[IF_BLOCK:BB[0-9]+_[0-9]+]] + ; GCN: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP.outer{{$}} ; GCN: s_mov_b64 [[LEFT_INNER:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow +; GCN: s_or_b64 exec, exec, [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]] +; GCN: s_and_b64 [[TMP0]], exec, [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]] +; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] +; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] +; GCN: s_andn2_b64 exec, exec, [[TMP0]] +; GCN: s_cbranch_execz [[FLOW2]] + ; GCN: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %LOOP{{$}} -; GCN: s_or_b64 [[BREAK_OUTER:s\[[0-9]+:[0-9]+\]]], [[BREAK_OUTER]], exec -; GCN: s_or_b64 [[BREAK_INNER:s\[[0-9]+:[0-9]+\]]], [[BREAK_INNER]], exec -; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc +; GCN: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], exec +; GCN: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], exec +; GCN: s_and_saveexec_b64 [[SAVE_EXEC]], vcc ; FIXME: duplicate comparison ; GCN: ; %ENDIF @@ -43,23 +59,7 @@ ; GCN-DAG: s_or_b64 [[BREAK_OUTER]], [[BREAK_OUTER]], [[TMP_EQ]] ; GCN-DAG: s_or_b64 [[BREAK_INNER]], [[BREAK_INNER]], [[TMP_NE]] -; GCN: ; %Flow -; GCN: s_or_b64 exec, exec, [[SAVE_EXEC]] -; GCN: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_INNER]] -; GCN: s_or_b64 [[TMP0]], [[TMP0]], [[LEFT_INNER]] -; GCN: s_mov_b64 [[LEFT_INNER]], [[TMP0]] -; GCN: s_andn2_b64 exec, exec, [[TMP0]] -; GCN: s_cbranch_execnz [[INNER_LOOP]] - -; GCN: ; %Flow2 -; GCN: s_or_b64 exec, exec, [[TMP0]] -; GCN: s_and_b64 [[TMP1:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK_OUTER]] -; GCN: s_or_b64 [[TMP1]], [[TMP1]], [[LEFT_OUTER]] -; GCN: s_mov_b64 [[LEFT_OUTER]], [[TMP1]] -; GCN: s_andn2_b64 exec, exec, [[TMP1]] -; GCN: s_cbranch_execnz [[OUTER_LOOP]] - -; GCN: ; %IF +; GCN: [[IF_BLOCK]]: ; %IF ; GCN-NEXT: s_endpgm define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { main_body: @@ -92,12 +92,18 @@ ENDIF: ; preds = %LOOP ; GCN-LABEL: {{^}}multi_if_break_loop: ; GCN: s_mov_b64 [[LEFT:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; GCN: ; %Flow4 +; GCN: s_and_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], exec, [[BREAK]] +; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]] +; GCN: s_andn2_b64 exec, exec, [[LEFT]] +; GCN-NEXT: s_cbranch_execz + ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: ; %bb1{{$}} -; GCN: s_mov_b64 [[OLD_LEFT:s\[[0-9]+:[0-9]+\]]], [[LEFT]] +; GCN: s_mov_b64 [[OLD_LEFT]], [[LEFT]] ; GCN: ; %LeafBlock1 ; GCN: s_mov_b64 -; GCN: s_mov_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: s_mov_b64 [[BREAK]], -1{{$}} ; GCN: ; %case1 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], @@ -118,12 +124,6 @@ ENDIF: ; preds = %LOOP ; GCN-DAG: s_and_b64 [[TMP:s\[[0-9]+:[0-9]+\]]], vcc, exec ; GCN: s_or_b64 [[BREAK]], [[BREAK]], [[TMP]] -; GCN: ; %Flow4 -; GCN: s_and_b64 [[BREAK]], exec, [[BREAK]] -; GCN: s_or_b64 [[LEFT]], [[BREAK]], [[OLD_LEFT]] -; GCN: s_andn2_b64 exec, exec, [[LEFT]] -; GCN-NEXT: s_cbranch_execnz - define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll index be5d8d47205..2be99267c4e 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -3,11 +3,11 @@ ; GCN-LABEL: {{^}}negated_cond: ; GCN: BB0_1: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB0_2: +; GCN: BB0_3: ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccnz BB0_4 +; GCN: s_cbranch_vccnz BB0_2 define amdgpu_kernel void @negated_cond(i32 addrspace(1)* %arg1) { bb: br label %bb1 @@ -36,11 +36,11 @@ bb4: ; GCN-LABEL: {{^}}negated_cond_dominated_blocks: ; GCN: v_cmp_eq_u32_e64 [[CC:[^,]+]], -; GCN: BB1_1: +; GCN: %bb4 ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: s_andn2_b64 vcc, exec, [[CC]] -; GCN: s_cbranch_vccz BB1_3 +; GCN: s_cbranch_vccnz BB1_1 define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll index ad68d300de0..14928749e9f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -96,20 +96,20 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}loop_land_info_assert: ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} ; SI: s_and_b64 [[CMP4M:s\[[0-9]+:[0-9]+\]]], exec, [[CMP4]] -; SI: s_branch [[INFLOOP:BB[0-9]+_[0-9]+]] + +; SI: [[WHILELOOP:BB[0-9]+_[0-9]+]]: ; %while.cond +; SI: s_cbranch_vccz [[FOR_COND_PH:BB[0-9]+_[0-9]+]] ; SI: [[CONVEX_EXIT:BB[0-9_]+]] ; SI: s_mov_b64 vcc, ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_cbranch_vccnz [[INFLOOP]] + +; SI: s_cbranch_vccnz [[WHILELOOP]] ; SI: ; %if.else ; SI: buffer_store_dword -; SI: [[INFLOOP]]: -; SI: s_cbranch_vccnz [[CONVEX_EXIT]] - -; SI: ; %for.cond.preheader +; SI: [[FOR_COND_PH]]: ; %for.cond.preheader ; SI: s_cbranch_vccz [[ENDPGM]] ; SI: [[ENDPGM]]: diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index e1da3cf8f46..a71ca5db765 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -166,30 +166,29 @@ endif: } ; GCN-LABEL: {{^}}test_loop_with_if: -; GCN: BB{{.*}}: ; %bb2 +; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}} +; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}} +; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}] +; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] +; GCN: s_cbranch_execz +; GCN: BB{{.*}}: ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} ; GCN: s_cbranch_execz -; GCN: BB{{.*}}: ; %bb5 -; GCN: BB{{.*}}: ; %Flow +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}} ; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}] ; GCN: ; mask branch BB -; GCN: BB{{.*}}: ; %bb11 -; GCN: BB{{.*}}: ; %Flow1 +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}} ; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}] ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}} ; GCN: ; mask branch BB -; GCN: BB{{.*}}: ; %bb10 -; GCN: BB{{.*}}: ; %bb13 -; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}} -; GFX1032: s_andn2_b32 exec_lo, exec_lo, s{{[0-9]+}} -; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}] -; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] -; GCN: s_cbranch_execnz -; GCN: ; %bb1 +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GCN: s_endpgm define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 { bb: @@ -231,17 +230,16 @@ bb13: ; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} ; GCN: ; mask branch ; GCN: s_cbranch_execz -; GCN: BB{{.*}}: ; %.preheader -; GCN: ; %bb8 +; GCN: BB{{.*}}: +; GCN: BB{{.*}}: ; GFX1032: s_andn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, exec_lo ; GFX1064: s_andn2_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], exec ; GFX1032: s_or_b32 s{{[0-9]+}}, vcc_lo, s{{[0-9]+}} ; GFX1032: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX1064: s_or_b64 s[{{[0-9:]+}}], vcc, s[{{[0-9:]+}}] ; GFX1064: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] -; GCN: BB{{.*}}: ; %Flow -; GCN: s_cbranch_execnz -; GCN: BB{{.*}}: ; %.loopexit +; GCN: s_cbranch_execz +; GCN: BB{{.*}}: define amdgpu_kernel void @test_loop_with_if_else_break(i32 addrspace(1)* %arg) #0 { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -657,7 +655,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d ; GCN-LABEL: {{^}}test_loop_vcc: ; GFX1032: v_cmp_lt_f32_e32 vcc_lo, ; GFX1064: v_cmp_lt_f32_e32 vcc, -; GCN: s_cbranch_vccz +; GCN: s_cbranch_vccnz define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 { entry: br label %loop diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 41ff30b4fdd..b827668950b 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -650,12 +650,15 @@ main_body: ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000 -; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body -; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] -; CHECK: s_cbranch_vccz [[LOOPHDR]] -; CHECK: ; %break +; CHECK: s_cbranch_vccnz +; CHECK: ; %body +; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] +; CHECK: s_branch [[LOOPHDR]] + +; CHECK: ; %break ; CHECK: ; return define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind { entry: diff --git a/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll index 4567b7f5fe2..286b9655f22 100644 --- a/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll +++ b/llvm/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll @@ -26,7 +26,7 @@ bb1: ; preds = %bb bb2: ; preds = %bb1, %entry ; CHECK: cmp [[REG]], #0 -; CHECK: ble +; CHECK: bgt %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] %tries.0 = sub i32 2147483647, %indvar %tmp1 = icmp sgt i32 %tries.0, 0 diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll index 3da504691e7..9c5f5c7b7de 100644 --- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -47,9 +47,8 @@ tailrecurse.switch: ; preds = %tailrecurse ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: beq -; V8-NEXT: %sw.epilog -; V8-NEXT: bx lr +; V8-NEXT: bne +; V8-NEXT: %sw.bb switch i32 %and, label %sw.epilog [ i32 1, label %sw.bb i32 3, label %sw.bb6 diff --git a/llvm/test/CodeGen/ARM/atomic-cmp.ll b/llvm/test/CodeGen/ARM/atomic-cmp.ll index 7f41b7d93d1..2f2f9d88bc0 100644 --- a/llvm/test/CodeGen/ARM/atomic-cmp.ll +++ b/llvm/test/CodeGen/ARM/atomic-cmp.ll @@ -9,8 +9,8 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind { ; ARM: clrex ; T2-LABEL: t: -; T2: strexb ; T2: ldrexb +; T2: strexb ; T2: clrex %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic %tmp1 = extractvalue { i8, i1 } %tmp0, 0 diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll index b5214f8d67e..59fb02654cd 100644 --- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll @@ -52,16 +52,16 @@ entry: ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8: ; CHECK-ARMV7-NEXT: .fnstart ; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 -; CHECK-ARMV7-NEXT: b [[TRY:.LBB[0-9_]+]] -; CHECK-ARMV7-NEXT: [[HEAD:.LBB[0-9_]+]]: -; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] +; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]: +; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS:r[0-9]+]], [r0] +; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 +; CHECK-ARMV7-NEXT: bne [[EXIT:.LBB[0-9_]+]] +; CHECK-ARMV7-NEXT: strexb [[SUCCESS]], r2, [r0] ; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0 ; CHECK-ARMV7-NEXT: moveq r0, #1 ; CHECK-ARMV7-NEXT: bxeq lr -; CHECK-ARMV7-NEXT: [[TRY]]: -; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0] -; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1 -; CHECK-ARMV7-NEXT: beq [[HEAD]] +; CHECK-ARMV7-NEXT: b [[TRY]] +; CHECK-ARMV7-NEXT: [[EXIT]]: ; CHECK-ARMV7-NEXT: mov r0, #0 ; CHECK-ARMV7-NEXT: clrex ; CHECK-ARMV7-NEXT: bx lr @@ -69,17 +69,17 @@ entry: ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8: ; CHECK-THUMBV7-NEXT: .fnstart ; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1 -; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]] -; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]: +; CHECK-THUMBV7-NEXT: [[TRYLD:.LBB[0-9_]+]] +; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] +; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]] +; CHECK-THUMBV7-NEXT: bne [[EXIT:.LBB[0-9_]+]] ; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0] ; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0 ; CHECK-THUMBV7-NEXT: itt eq ; CHECK-THUMBV7-NEXT: moveq r0, #1 ; CHECK-THUMBV7-NEXT: bxeq lr -; CHECK-THUMBV7-NEXT: [[TRYLD]]: -; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0] -; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]] -; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]] +; CHECK-THUMBV7-NEXT: b [[TRYLD]] +; CHECK-THUMBV7-NEXT: [[EXIT]]: ; CHECK-THUMBV7-NEXT: movs r0, #0 ; CHECK-THUMBV7-NEXT: clrex ; CHECK-THUMBV7-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll index 362a0902883..e0db88aa670 100644 --- a/llvm/test/CodeGen/ARM/code-placement.ll +++ b/llvm/test/CodeGen/ARM/code-placement.ll @@ -38,8 +38,9 @@ entry: br i1 %0, label %bb5, label %bb.nph15 bb1: ; preds = %bb2.preheader, %bb1 +; CHECK: LBB1_[[BB3:.]]: @ %bb3 ; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader -; CHECK: bmi LBB1_[[BB3:.]] +; CHECK: bmi LBB1_[[BB3]] %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2] %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1] %tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1] @@ -53,7 +54,6 @@ bb1: ; preds = %bb2.preheader, %bb1 bb3: ; preds = %bb1, %bb2.preheader ; CHECK: LBB1_[[BB1:.]]: @ %bb1 ; CHECK: bne LBB1_[[BB1]] -; CHECK: LBB1_[[BB3]]: @ %bb3 %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2] %3 = add i32 %pass.011, 1 ; <i32> [#uses=2] %exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1] diff --git a/llvm/test/CodeGen/ARM/pr32578.ll b/llvm/test/CodeGen/ARM/pr32578.ll index 08ce8f813ee..b46bb5e8cbf 100644 --- a/llvm/test/CodeGen/ARM/pr32578.ll +++ b/llvm/test/CodeGen/ARM/pr32578.ll @@ -4,7 +4,7 @@ target triple = "armv7" ; CHECK-LABEL: func: ; CHECK: push {r11, lr} ; CHECK: vpush {d8} -; CHECK: b .LBB0_2 +; CHECK: .LBB0_1: @ %tailrecurse define arm_aapcscc double @func() { br label %tailrecurse diff --git a/llvm/test/CodeGen/Hexagon/bug6757-endloop.ll b/llvm/test/CodeGen/Hexagon/bug6757-endloop.ll index a4e593fa2ba..9fec47e54cd 100644 --- a/llvm/test/CodeGen/Hexagon/bug6757-endloop.ll +++ b/llvm/test/CodeGen/Hexagon/bug6757-endloop.ll @@ -4,10 +4,10 @@ ; This situation can arise due to tail duplication. ; CHECK: loop1([[LP:.LBB0_[0-9]+]] +; CHECK: endloop1 ; CHECK: [[LP]]: ; CHECK-NOT: loop1( ; CHECK: endloop1 -; CHECK: endloop1 %s.0 = type { i32, i8* } %s.1 = type { i32, i32, i32, i32 } diff --git a/llvm/test/CodeGen/Hexagon/early-if-merge-loop.ll b/llvm/test/CodeGen/Hexagon/early-if-merge-loop.ll index ab8b00d6c90..01983cfb969 100644 --- a/llvm/test/CodeGen/Hexagon/early-if-merge-loop.ll +++ b/llvm/test/CodeGen/Hexagon/early-if-merge-loop.ll @@ -2,9 +2,11 @@ ; Make sure that the loop in the end has only one basic block. ; CHECK-LABEL: fred +; CHECK: %b2 ; Rely on the comments, make sure the one for the loop header is present. ; CHECK: %loop -; CHECK-NOT: %should_merge +; CHECK: %should_merge +; CHECK: %exit target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/prof-early-if.ll b/llvm/test/CodeGen/Hexagon/prof-early-if.ll index a5215a9b351..b0f21110b7d 100644 --- a/llvm/test/CodeGen/Hexagon/prof-early-if.ll +++ b/llvm/test/CodeGen/Hexagon/prof-early-if.ll @@ -1,8 +1,8 @@ ; RUN: llc -O2 -march=hexagon < %s | FileCheck %s ; Rely on the comments generated by llc. Check that "if.then" was not predicated. +; CHECK: b5 ; CHECK: b2 ; CHECK-NOT: if{{.*}}memd -; CHECK: b5 %s.0 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %s.1], [5 x i32] } %s.1 = type { i32, i32 } diff --git a/llvm/test/CodeGen/Hexagon/redundant-branching2.ll b/llvm/test/CodeGen/Hexagon/redundant-branching2.ll index 50007007d17..e9305e9fd27 100644 --- a/llvm/test/CodeGen/Hexagon/redundant-branching2.ll +++ b/llvm/test/CodeGen/Hexagon/redundant-branching2.ll @@ -3,9 +3,9 @@ ; CHECK: memub ; CHECK: memub +; CHECK: cmp.eq ; CHECK: memub ; CHECK-NOT: if{{.*}}jump .LBB -; CHECK: cmp.eq target triple = "hexagon-unknown--elf" diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll index 448f254aa88..7cfa4d2f6f0 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll @@ -401,16 +401,15 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test40: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 -; PPC64LE-NEXT: b .LBB40_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB40_1: -; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB40_2: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB40_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB40_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stbcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB40_1 +; PPC64LE-NEXT: .LBB40_3: ; PPC64LE-NEXT: stbcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic @@ -466,16 +465,15 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB43_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB43_1: -; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB43_2: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB43_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB43_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stbcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB43_1 +; PPC64LE-NEXT: .LBB43_3: ; PPC64LE-NEXT: stbcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release monotonic @@ -487,16 +485,15 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB44_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB44_1: -; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB44_2: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB44_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB44_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stbcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB44_1 +; PPC64LE-NEXT: .LBB44_3: ; PPC64LE-NEXT: stbcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire @@ -622,16 +619,15 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test50: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 -; PPC64LE-NEXT: b .LBB50_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB50_1: -; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB50_2: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB50_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB50_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: sthcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB50_1 +; PPC64LE-NEXT: .LBB50_3: ; PPC64LE-NEXT: sthcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic @@ -687,16 +683,15 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB53_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB53_1: -; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB53_2: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB53_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB53_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: sthcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB53_1 +; PPC64LE-NEXT: .LBB53_3: ; PPC64LE-NEXT: sthcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release monotonic @@ -708,16 +703,15 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB54_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB54_1: -; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB54_2: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB54_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB54_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: sthcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB54_1 +; PPC64LE-NEXT: .LBB54_3: ; PPC64LE-NEXT: sthcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire @@ -842,16 +836,15 @@ define void @test59(i16* %ptr, i16 %cmp, i16 %val) { define void @test60(i32* %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test60: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: b .LBB60_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB60_1: -; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB60_2: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB60_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB60_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stwcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB60_1 +; PPC64LE-NEXT: .LBB60_3: ; PPC64LE-NEXT: stwcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic @@ -904,16 +897,15 @@ define void @test63(i32* %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test63: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB63_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB63_1: -; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB63_2: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB63_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB63_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stwcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB63_1 +; PPC64LE-NEXT: .LBB63_3: ; PPC64LE-NEXT: stwcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release monotonic @@ -924,16 +916,15 @@ define void @test64(i32* %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test64: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB64_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB64_1: -; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB64_2: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB64_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB64_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stwcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB64_1 +; PPC64LE-NEXT: .LBB64_3: ; PPC64LE-NEXT: stwcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire @@ -1053,16 +1044,15 @@ define void @test69(i32* %ptr, i32 %cmp, i32 %val) { define void @test70(i64* %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test70: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: b .LBB70_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB70_1: -; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB70_2: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: beq 0, .LBB70_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB70_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stdcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB70_1 +; PPC64LE-NEXT: .LBB70_3: ; PPC64LE-NEXT: stdcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic @@ -1115,16 +1105,15 @@ define void @test73(i64* %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test73: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB73_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB73_1: -; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB73_2: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: beq 0, .LBB73_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB73_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stdcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB73_1 +; PPC64LE-NEXT: .LBB73_3: ; PPC64LE-NEXT: stdcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release monotonic @@ -1135,16 +1124,15 @@ define void @test74(i64* %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test74: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB74_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB74_1: -; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB74_2: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: beq 0, .LBB74_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB74_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stdcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB74_1 +; PPC64LE-NEXT: .LBB74_3: ; PPC64LE-NEXT: stdcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire @@ -1265,16 +1253,15 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE-LABEL: test80: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 -; PPC64LE-NEXT: b .LBB80_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB80_1: -; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB80_2: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB80_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB80_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stbcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB80_1 +; PPC64LE-NEXT: .LBB80_3: ; PPC64LE-NEXT: stbcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic @@ -1330,16 +1317,15 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB83_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB83_1: -; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB83_2: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB83_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB83_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stbcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB83_1 +; PPC64LE-NEXT: .LBB83_3: ; PPC64LE-NEXT: stbcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic @@ -1351,16 +1337,15 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 24, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB84_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB84_1: -; PPC64LE-NEXT: stbcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB84_2: ; PPC64LE-NEXT: lbarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB84_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB84_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stbcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB84_1 +; PPC64LE-NEXT: .LBB84_3: ; PPC64LE-NEXT: stbcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire @@ -1486,16 +1471,15 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE-LABEL: test90: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 -; PPC64LE-NEXT: b .LBB90_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB90_1: -; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB90_2: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB90_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB90_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: sthcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b +; PPC64LE-NEXT: .LBB90_3: ; PPC64LE-NEXT: sthcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic @@ -1551,16 +1535,15 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB93_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB93_1: -; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB93_2: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB93_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB93_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: sthcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB93_1 +; PPC64LE-NEXT: .LBB93_3: ; PPC64LE-NEXT: sthcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic @@ -1572,16 +1555,15 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) { ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: rlwinm 4, 4, 0, 16, 31 ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB94_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB94_1: -; PPC64LE-NEXT: sthcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB94_2: ; PPC64LE-NEXT: lharx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB94_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB94_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: sthcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB94_1 +; PPC64LE-NEXT: .LBB94_3: ; PPC64LE-NEXT: sthcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire @@ -1706,16 +1688,15 @@ define void @test99(i16* %ptr, i16 %cmp, i16 %val) { define void @test100(i32* %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test100: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: b .LBB100_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB100_1: -; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB100_2: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB100_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB100_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stwcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB100_1 +; PPC64LE-NEXT: .LBB100_3: ; PPC64LE-NEXT: stwcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic @@ -1768,16 +1749,15 @@ define void @test103(i32* %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test103: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB103_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB103_1: -; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB103_2: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB103_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB103_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stwcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB103_1 +; PPC64LE-NEXT: .LBB103_3: ; PPC64LE-NEXT: stwcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic @@ -1788,16 +1768,15 @@ define void @test104(i32* %ptr, i32 %cmp, i32 %val) { ; PPC64LE-LABEL: test104: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB104_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB104_1: -; PPC64LE-NEXT: stwcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB104_2: ; PPC64LE-NEXT: lwarx 6, 0, 3 ; PPC64LE-NEXT: cmpw 4, 6 -; PPC64LE-NEXT: beq 0, .LBB104_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB104_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stwcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB104_1 +; PPC64LE-NEXT: .LBB104_3: ; PPC64LE-NEXT: stwcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire @@ -1917,16 +1896,15 @@ define void @test109(i32* %ptr, i32 %cmp, i32 %val) { define void @test110(i64* %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test110: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: b .LBB110_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB110_1: -; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB110_2: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: beq 0, .LBB110_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB110_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stdcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB110_1 +; PPC64LE-NEXT: .LBB110_3: ; PPC64LE-NEXT: stdcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic @@ -1979,16 +1957,15 @@ define void @test113(i64* %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test113: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB113_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB113_1: -; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB113_2: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: beq 0, .LBB113_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB113_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stdcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB113_1 +; PPC64LE-NEXT: .LBB113_3: ; PPC64LE-NEXT: stdcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic @@ -1999,16 +1976,15 @@ define void @test114(i64* %ptr, i64 %cmp, i64 %val) { ; PPC64LE-LABEL: test114: ; PPC64LE: # %bb.0: ; PPC64LE-NEXT: lwsync -; PPC64LE-NEXT: b .LBB114_2 -; PPC64LE-NEXT: .p2align 5 ; PPC64LE-NEXT: .LBB114_1: -; PPC64LE-NEXT: stdcx. 5, 0, 3 -; PPC64LE-NEXT: beqlr 0 -; PPC64LE-NEXT: .LBB114_2: ; PPC64LE-NEXT: ldarx 6, 0, 3 ; PPC64LE-NEXT: cmpd 4, 6 -; PPC64LE-NEXT: beq 0, .LBB114_1 -; PPC64LE-NEXT: # %bb.3: +; PPC64LE-NEXT: bne 0, .LBB114_3 +; PPC64LE-NEXT: # %bb.2: +; PPC64LE-NEXT: stdcx. 5, 0, 3 +; PPC64LE-NEXT: beqlr 0 +; PPC64LE-NEXT: b .LBB114_1 +; PPC64LE-NEXT: .LBB114_3: ; PPC64LE-NEXT: stdcx. 6, 0, 3 ; PPC64LE-NEXT: blr %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire diff --git a/llvm/test/CodeGen/PowerPC/block-placement-1.mir b/llvm/test/CodeGen/PowerPC/block-placement-1.mir index e756ba3aa46..01967e46da9 100644 --- a/llvm/test/CodeGen/PowerPC/block-placement-1.mir +++ b/llvm/test/CodeGen/PowerPC/block-placement-1.mir @@ -298,14 +298,14 @@ body: | bb.11.unreachable: + ; CHECK: bb.1.for.body: + ; CHECK: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) + ; CHECK: B %bb.2 + ; CHECK: bb.4.catch4: ; CHECK: successors: %bb.11(0x7ffff800), %bb.6(0x00000800) ; CHECK: B %bb.11 - ; CHECK: bb.1.for.body (align 4): - ; CHECK: successors: %bb.2(0x7ffff800), %bb.3(0x00000800) - ; CHECK: B %bb.2 - ; CHECK: bb.2..noexc: ; CHECK: bb.11.unreachable: diff --git a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll index 6bc8b8a041c..95fcb1796c8 100644 --- a/llvm/test/CodeGen/PowerPC/cmp_elimination.ll +++ b/llvm/test/CodeGen/PowerPC/cmp_elimination.ll @@ -718,13 +718,14 @@ if.end: define void @func28(i32 signext %a) { ; CHECK-LABEL: @func28 ; CHECK: cmplwi [[REG1:[0-9]+]], [[REG2:[0-9]+]] -; CHECK: .[[LABEL1:[A-Z0-9_]+]]: +; CHECK: .[[LABEL2:[A-Z0-9_]+]]: +; CHECK: cmpwi [[REG1]], [[REG2]] +; CHECK: ble 0, .[[LABEL1:[A-Z0-9_]+]] ; CHECK-NOT: cmp -; CHECK: bne 0, .[[LABEL2:[A-Z0-9_]+]] +; CHECK: bne 0, .[[LABEL2]] ; CHECK: bl dummy1 -; CHECK: .[[LABEL2]]: -; CHECK: cmpwi [[REG1]], [[REG2]] -; CHECK: bgt 0, .[[LABEL1]] +; CHECK: b .[[LABEL2]] +; CHECK: .[[LABEL1]]: ; CHECK: blr entry: br label %do.body diff --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll index 045f7a4c07c..9fab26b6222 100644 --- a/llvm/test/CodeGen/PowerPC/licm-remat.ll +++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -24,8 +24,7 @@ define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompres ; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l ; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha ; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l -; CHECK: b .[[LABEL1:[A-Z0-9_]+]] -; CHECK: .[[LABEL1]]: # %for.cond +; CHECK: .LBB0_2: # %for.cond ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL8wordmaskE@toc@ha ; CHECK-NOT: addis {{[0-9]+}}, 2, _ZN6snappy8internalL10char_tableE@toc@ha ; CHECK: bctrl diff --git a/llvm/test/CodeGen/PowerPC/machine-pre.ll b/llvm/test/CodeGen/PowerPC/machine-pre.ll index 2d7a5619c63..596c0a4624c 100644 --- a/llvm/test/CodeGen/PowerPC/machine-pre.ll +++ b/llvm/test/CodeGen/PowerPC/machine-pre.ll @@ -75,8 +75,19 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) local_unnamed_ ; CHECK-P9-NEXT: lis r3, 21845 ; CHECK-P9-NEXT: add r28, r30, r29 ; CHECK-P9-NEXT: ori r27, r3, 21846 +; CHECK-P9-NEXT: b .LBB1_4 ; CHECK-P9-NEXT: .p2align 4 -; CHECK-P9-NEXT: .LBB1_1: # %while.cond +; CHECK-P9-NEXT: .LBB1_1: # %sw.bb3 +; CHECK-P9-NEXT: # +; CHECK-P9-NEXT: mulli r3, r30, 23 +; CHECK-P9-NEXT: .LBB1_2: # %sw.epilog +; CHECK-P9-NEXT: # +; CHECK-P9-NEXT: add r28, r3, r28 +; CHECK-P9-NEXT: .LBB1_3: # %sw.epilog +; CHECK-P9-NEXT: # +; CHECK-P9-NEXT: cmpwi r28, 1025 +; CHECK-P9-NEXT: bge cr0, .LBB1_7 +; CHECK-P9-NEXT: .LBB1_4: # %while.cond ; CHECK-P9-NEXT: # ; CHECK-P9-NEXT: extsw r3, r29 ; CHECK-P9-NEXT: bl bar @@ -95,27 +106,16 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) local_unnamed_ ; CHECK-P9-NEXT: add r4, r4, r5 ; CHECK-P9-NEXT: subf r3, r4, r3 ; CHECK-P9-NEXT: cmplwi r3, 1 -; CHECK-P9-NEXT: beq cr0, .LBB1_4 -; CHECK-P9-NEXT: # %bb.2: # %while.cond +; CHECK-P9-NEXT: beq cr0, .LBB1_1 +; CHECK-P9-NEXT: # %bb.5: # %while.cond ; CHECK-P9-NEXT: # ; CHECK-P9-NEXT: cmplwi r3, 0 -; CHECK-P9-NEXT: bne cr0, .LBB1_6 -; CHECK-P9-NEXT: # %bb.3: # %sw.bb +; CHECK-P9-NEXT: bne cr0, .LBB1_3 +; CHECK-P9-NEXT: # %bb.6: # %sw.bb ; CHECK-P9-NEXT: # ; CHECK-P9-NEXT: mulli r3, r29, 13 -; CHECK-P9-NEXT: b .LBB1_5 -; CHECK-P9-NEXT: .p2align 4 -; CHECK-P9-NEXT: .LBB1_4: # %sw.bb3 -; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: mulli r3, r30, 23 -; CHECK-P9-NEXT: .LBB1_5: # %sw.epilog -; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: add r28, r3, r28 -; CHECK-P9-NEXT: .LBB1_6: # %sw.epilog -; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: cmpwi r28, 1025 -; CHECK-P9-NEXT: blt cr0, .LBB1_1 -; CHECK-P9-NEXT: # %bb.7: # %while.end +; CHECK-P9-NEXT: b .LBB1_2 +; CHECK-P9-NEXT: .LBB1_7: # %while.end ; CHECK-P9-NEXT: lis r3, -13108 ; CHECK-P9-NEXT: ori r3, r3, 52429 ; CHECK-P9-NEXT: mullw r3, r28, r3 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 7fcf8b0b4ef..c12a298b15e 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2083,17 +2083,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB35_2 ; RV32I-NEXT: .LBB35_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB35_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB35_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB35_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB35_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -2102,8 +2094,18 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB35_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB35_4 +; RV32I-NEXT: .LBB35_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB35_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB35_1 +; RV32I-NEXT: .LBB35_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2156,17 +2158,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB35_2 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB35_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB35_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB35_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB35_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -2175,8 +2169,18 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB35_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB35_4 +; RV64I-NEXT: .LBB35_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB35_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB35_1 +; RV64I-NEXT: .LBB35_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2233,17 +2237,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB36_2 ; RV32I-NEXT: .LBB36_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB36_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB36_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB36_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB36_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB36_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -2252,8 +2248,18 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB36_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB36_4 +; RV32I-NEXT: .LBB36_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB36_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB36_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB36_1 +; RV32I-NEXT: .LBB36_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2306,17 +2312,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB36_2 ; RV64I-NEXT: .LBB36_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB36_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB36_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB36_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB36_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB36_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -2325,8 +2323,18 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB36_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB36_4 +; RV64I-NEXT: .LBB36_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB36_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB36_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB36_1 +; RV64I-NEXT: .LBB36_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2383,17 +2391,9 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB37_2 ; RV32I-NEXT: .LBB37_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB37_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB37_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB37_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB37_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB37_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -2402,8 +2402,18 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB37_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB37_4 +; RV32I-NEXT: .LBB37_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB37_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB37_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB37_1 +; RV32I-NEXT: .LBB37_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2456,17 +2466,9 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB37_2 ; RV64I-NEXT: .LBB37_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB37_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB37_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB37_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB37_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB37_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -2475,8 +2477,18 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB37_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB37_4 +; RV64I-NEXT: .LBB37_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB37_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB37_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB37_1 +; RV64I-NEXT: .LBB37_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2533,17 +2545,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB38_2 ; RV32I-NEXT: .LBB38_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB38_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB38_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB38_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB38_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB38_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -2552,8 +2556,18 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB38_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB38_4 +; RV32I-NEXT: .LBB38_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB38_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB38_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB38_1 +; RV32I-NEXT: .LBB38_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2606,17 +2620,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB38_2 ; RV64I-NEXT: .LBB38_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB38_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB38_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB38_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB38_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB38_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -2625,8 +2631,18 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB38_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB38_4 +; RV64I-NEXT: .LBB38_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB38_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB38_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB38_1 +; RV64I-NEXT: .LBB38_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2683,17 +2699,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB39_2 ; RV32I-NEXT: .LBB39_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB39_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB39_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB39_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB39_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB39_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -2702,8 +2710,18 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB39_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB39_4 +; RV32I-NEXT: .LBB39_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB39_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB39_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB39_1 +; RV32I-NEXT: .LBB39_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2756,17 +2774,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB39_2 ; RV64I-NEXT: .LBB39_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB39_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB39_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB39_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB39_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB39_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -2775,8 +2785,18 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB39_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB39_4 +; RV64I-NEXT: .LBB39_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB39_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB39_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB39_1 +; RV64I-NEXT: .LBB39_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2833,17 +2853,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB40_2 ; RV32I-NEXT: .LBB40_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB40_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB40_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB40_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB40_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB40_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -2852,8 +2864,18 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB40_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB40_4 +; RV32I-NEXT: .LBB40_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB40_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB40_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB40_1 +; RV32I-NEXT: .LBB40_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -2906,17 +2928,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB40_2 ; RV64I-NEXT: .LBB40_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB40_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB40_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB40_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB40_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB40_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -2925,8 +2939,18 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB40_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB40_4 +; RV64I-NEXT: .LBB40_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB40_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB40_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB40_1 +; RV64I-NEXT: .LBB40_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -2983,17 +3007,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB41_2 ; RV32I-NEXT: .LBB41_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB41_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB41_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB41_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB41_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB41_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3002,8 +3018,18 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB41_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB41_4 +; RV32I-NEXT: .LBB41_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB41_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB41_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB41_1 +; RV32I-NEXT: .LBB41_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3056,17 +3082,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB41_2 ; RV64I-NEXT: .LBB41_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB41_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB41_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB41_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB41_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB41_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3075,8 +3093,18 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB41_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB41_4 +; RV64I-NEXT: .LBB41_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB41_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB41_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB41_1 +; RV64I-NEXT: .LBB41_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3133,17 +3161,9 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB42_2 ; RV32I-NEXT: .LBB42_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB42_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB42_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB42_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB42_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB42_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3152,8 +3172,18 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB42_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB42_4 +; RV32I-NEXT: .LBB42_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB42_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB42_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB42_1 +; RV32I-NEXT: .LBB42_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3206,17 +3236,9 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB42_2 ; RV64I-NEXT: .LBB42_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB42_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB42_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB42_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB42_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB42_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3225,8 +3247,18 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB42_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB42_4 +; RV64I-NEXT: .LBB42_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB42_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB42_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB42_1 +; RV64I-NEXT: .LBB42_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3283,17 +3315,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB43_2 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB43_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB43_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB43_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB43_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3302,8 +3326,18 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB43_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB43_4 +; RV32I-NEXT: .LBB43_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB43_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB43_1 +; RV32I-NEXT: .LBB43_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3356,17 +3390,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB43_2 ; RV64I-NEXT: .LBB43_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB43_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB43_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB43_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB43_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3375,8 +3401,18 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB43_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB43_4 +; RV64I-NEXT: .LBB43_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB43_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB43_1 +; RV64I-NEXT: .LBB43_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3433,17 +3469,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 24 ; RV32I-NEXT: srai s0, a1, 24 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB44_2 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 24 -; RV32I-NEXT: srai a1, a1, 24 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB44_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB44_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB44_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB44_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3452,8 +3480,18 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB44_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB44_4 +; RV32I-NEXT: .LBB44_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB44_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB44_1 +; RV32I-NEXT: .LBB44_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3506,17 +3544,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 56 ; RV64I-NEXT: srai s0, a1, 56 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB44_2 ; RV64I-NEXT: .LBB44_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 56 -; RV64I-NEXT: srai a1, a1, 56 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB44_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB44_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB44_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB44_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3525,8 +3555,18 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB44_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB44_4 +; RV64I-NEXT: .LBB44_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB44_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB44_1 +; RV64I-NEXT: .LBB44_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3582,16 +3622,9 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB45_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB45_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB45_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB45_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3600,8 +3633,17 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB45_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB45_4 +; RV32I-NEXT: .LBB45_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a1, .LBB45_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB45_1 +; RV32I-NEXT: .LBB45_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3648,16 +3690,9 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB45_2 ; RV64I-NEXT: .LBB45_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB45_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB45_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB45_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB45_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3666,8 +3701,17 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB45_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB45_4 +; RV64I-NEXT: .LBB45_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB45_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB45_1 +; RV64I-NEXT: .LBB45_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3718,16 +3762,9 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB46_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB46_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB46_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB46_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3736,8 +3773,17 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB46_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB46_4 +; RV32I-NEXT: .LBB46_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a1, .LBB46_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB46_1 +; RV32I-NEXT: .LBB46_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3784,16 +3830,9 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB46_2 ; RV64I-NEXT: .LBB46_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB46_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB46_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB46_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB46_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3802,8 +3841,17 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB46_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB46_4 +; RV64I-NEXT: .LBB46_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB46_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB46_1 +; RV64I-NEXT: .LBB46_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3854,16 +3902,9 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB47_2 ; RV32I-NEXT: .LBB47_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB47_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB47_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB47_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB47_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -3872,8 +3913,17 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB47_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB47_4 +; RV32I-NEXT: .LBB47_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a1, .LBB47_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB47_1 +; RV32I-NEXT: .LBB47_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -3920,16 +3970,9 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB47_2 ; RV64I-NEXT: .LBB47_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB47_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB47_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB47_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB47_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -3938,8 +3981,17 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB47_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB47_4 +; RV64I-NEXT: .LBB47_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB47_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB47_1 +; RV64I-NEXT: .LBB47_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -3990,16 +4042,9 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB48_2 ; RV32I-NEXT: .LBB48_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB48_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB48_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB48_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB48_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4008,8 +4053,17 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB48_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB48_4 +; RV32I-NEXT: .LBB48_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a1, .LBB48_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB48_1 +; RV32I-NEXT: .LBB48_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4056,16 +4110,9 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB48_2 ; RV64I-NEXT: .LBB48_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB48_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB48_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB48_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB48_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4074,8 +4121,17 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB48_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB48_4 +; RV64I-NEXT: .LBB48_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB48_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB48_1 +; RV64I-NEXT: .LBB48_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4126,16 +4182,9 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB49_2 ; RV32I-NEXT: .LBB49_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s0, a1, .LBB49_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB49_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB49_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB49_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4144,8 +4193,17 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB49_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB49_4 +; RV32I-NEXT: .LBB49_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a1, .LBB49_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB49_1 +; RV32I-NEXT: .LBB49_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4192,16 +4250,9 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB49_2 ; RV64I-NEXT: .LBB49_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB49_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB49_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB49_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB49_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4210,8 +4261,17 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB49_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB49_4 +; RV64I-NEXT: .LBB49_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB49_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB49_1 +; RV64I-NEXT: .LBB49_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4262,16 +4322,9 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB50_2 ; RV32I-NEXT: .LBB50_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB50_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB50_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB50_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB50_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4280,8 +4333,17 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB50_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB50_4 +; RV32I-NEXT: .LBB50_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a1, .LBB50_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB50_1 +; RV32I-NEXT: .LBB50_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4328,16 +4390,9 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB50_2 ; RV64I-NEXT: .LBB50_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB50_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB50_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB50_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB50_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4346,8 +4401,17 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB50_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB50_4 +; RV64I-NEXT: .LBB50_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB50_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB50_1 +; RV64I-NEXT: .LBB50_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4398,16 +4462,9 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB51_2 ; RV32I-NEXT: .LBB51_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB51_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB51_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB51_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB51_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4416,8 +4473,17 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB51_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB51_4 +; RV32I-NEXT: .LBB51_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a1, .LBB51_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB51_1 +; RV32I-NEXT: .LBB51_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4464,16 +4530,9 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB51_2 ; RV64I-NEXT: .LBB51_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB51_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB51_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB51_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB51_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4482,8 +4541,17 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB51_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB51_4 +; RV64I-NEXT: .LBB51_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB51_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB51_1 +; RV64I-NEXT: .LBB51_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4534,16 +4602,9 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB52_2 ; RV32I-NEXT: .LBB52_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB52_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB52_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB52_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB52_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4552,8 +4613,17 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB52_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB52_4 +; RV32I-NEXT: .LBB52_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a1, .LBB52_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB52_1 +; RV32I-NEXT: .LBB52_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4600,16 +4670,9 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB52_2 ; RV64I-NEXT: .LBB52_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB52_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB52_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB52_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB52_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4618,8 +4681,17 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB52_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB52_4 +; RV64I-NEXT: .LBB52_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB52_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB52_1 +; RV64I-NEXT: .LBB52_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4670,16 +4742,9 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB53_2 ; RV32I-NEXT: .LBB53_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB53_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB53_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB53_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB53_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4688,8 +4753,17 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB53_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB53_4 +; RV32I-NEXT: .LBB53_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a1, .LBB53_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB53_1 +; RV32I-NEXT: .LBB53_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4736,16 +4810,9 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB53_2 ; RV64I-NEXT: .LBB53_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB53_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB53_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB53_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB53_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4754,8 +4821,17 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB53_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB53_4 +; RV64I-NEXT: .LBB53_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB53_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB53_1 +; RV64I-NEXT: .LBB53_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -4806,16 +4882,9 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s0, a1, 255 ; RV32I-NEXT: addi s3, sp, 11 +; RV32I-NEXT: j .LBB54_2 ; RV32I-NEXT: .LBB54_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a1, a0, 255 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s0, a1, .LBB54_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB54_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB54_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB54_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 ; RV32I-NEXT: sb a0, 11(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -4824,8 +4893,17 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_1 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lb a0, 11(sp) -; RV32I-NEXT: beqz a1, .LBB54_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB54_4 +; RV32I-NEXT: .LBB54_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a1, .LBB54_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB54_1 +; RV32I-NEXT: .LBB54_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -4872,16 +4950,9 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s0, a1, 255 ; RV64I-NEXT: addi s3, sp, 7 +; RV64I-NEXT: j .LBB54_2 ; RV64I-NEXT: .LBB54_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a1, a0, 255 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB54_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB54_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB54_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB54_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 ; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -4890,8 +4961,17 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_1 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lb a0, 7(sp) -; RV64I-NEXT: beqz a1, .LBB54_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB54_4 +; RV64I-NEXT: .LBB54_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB54_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB54_1 +; RV64I-NEXT: .LBB54_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7093,17 +7173,9 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB90_2 ; RV32I-NEXT: .LBB90_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB90_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB90_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB90_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB90_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB90_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -7112,8 +7184,18 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB90_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB90_4 +; RV32I-NEXT: .LBB90_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB90_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB90_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB90_1 +; RV32I-NEXT: .LBB90_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7167,17 +7249,9 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB90_2 ; RV64I-NEXT: .LBB90_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB90_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB90_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB90_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB90_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB90_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -7186,8 +7260,18 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB90_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB90_4 +; RV64I-NEXT: .LBB90_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB90_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB90_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB90_1 +; RV64I-NEXT: .LBB90_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7245,17 +7329,9 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB91_2 ; RV32I-NEXT: .LBB91_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB91_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB91_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB91_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB91_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB91_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -7264,8 +7340,18 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB91_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB91_4 +; RV32I-NEXT: .LBB91_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB91_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB91_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB91_1 +; RV32I-NEXT: .LBB91_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7319,17 +7405,9 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB91_2 ; RV64I-NEXT: .LBB91_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB91_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB91_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB91_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB91_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB91_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -7338,8 +7416,18 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB91_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB91_4 +; RV64I-NEXT: .LBB91_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB91_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB91_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB91_1 +; RV64I-NEXT: .LBB91_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7397,17 +7485,9 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB92_2 ; RV32I-NEXT: .LBB92_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB92_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB92_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB92_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB92_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB92_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -7416,8 +7496,18 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB92_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB92_4 +; RV32I-NEXT: .LBB92_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB92_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB92_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB92_1 +; RV32I-NEXT: .LBB92_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7471,17 +7561,9 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB92_2 ; RV64I-NEXT: .LBB92_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB92_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB92_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB92_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB92_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB92_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -7490,8 +7572,18 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB92_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB92_4 +; RV64I-NEXT: .LBB92_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB92_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB92_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB92_1 +; RV64I-NEXT: .LBB92_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7549,17 +7641,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB93_2 ; RV32I-NEXT: .LBB93_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB93_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB93_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB93_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB93_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB93_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -7568,8 +7652,18 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB93_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB93_4 +; RV32I-NEXT: .LBB93_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB93_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB93_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB93_1 +; RV32I-NEXT: .LBB93_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7623,17 +7717,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB93_2 ; RV64I-NEXT: .LBB93_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB93_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB93_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB93_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB93_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB93_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -7642,8 +7728,18 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB93_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB93_4 +; RV64I-NEXT: .LBB93_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB93_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB93_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB93_1 +; RV64I-NEXT: .LBB93_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7701,17 +7797,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB94_2 ; RV32I-NEXT: .LBB94_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt s0, a1, .LBB94_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB94_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB94_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB94_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB94_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -7720,8 +7808,18 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB94_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB94_4 +; RV32I-NEXT: .LBB94_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a1, .LBB94_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB94_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB94_1 +; RV32I-NEXT: .LBB94_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7775,17 +7873,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB94_2 ; RV64I-NEXT: .LBB94_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB94_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB94_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB94_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB94_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB94_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -7794,8 +7884,18 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB94_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB94_4 +; RV64I-NEXT: .LBB94_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB94_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB94_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB94_1 +; RV64I-NEXT: .LBB94_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -7853,17 +7953,9 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB95_2 ; RV32I-NEXT: .LBB95_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB95_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB95_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB95_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB95_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB95_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -7872,8 +7964,18 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB95_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB95_4 +; RV32I-NEXT: .LBB95_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB95_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB95_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB95_1 +; RV32I-NEXT: .LBB95_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -7927,17 +8029,9 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB95_2 ; RV64I-NEXT: .LBB95_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB95_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB95_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB95_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB95_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB95_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -7946,8 +8040,18 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB95_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB95_4 +; RV64I-NEXT: .LBB95_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB95_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB95_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB95_1 +; RV64I-NEXT: .LBB95_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8005,17 +8109,9 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB96_2 ; RV32I-NEXT: .LBB96_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB96_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB96_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB96_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB96_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB96_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -8024,8 +8120,18 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB96_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB96_4 +; RV32I-NEXT: .LBB96_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB96_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB96_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB96_1 +; RV32I-NEXT: .LBB96_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8079,17 +8185,9 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB96_2 ; RV64I-NEXT: .LBB96_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB96_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB96_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB96_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB96_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB96_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -8098,8 +8196,18 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB96_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB96_4 +; RV64I-NEXT: .LBB96_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB96_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB96_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB96_1 +; RV64I-NEXT: .LBB96_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8157,17 +8265,9 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB97_2 ; RV32I-NEXT: .LBB97_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB97_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB97_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB97_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB97_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB97_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -8176,8 +8276,18 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB97_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB97_4 +; RV32I-NEXT: .LBB97_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB97_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB97_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB97_1 +; RV32I-NEXT: .LBB97_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8231,17 +8341,9 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB97_2 ; RV64I-NEXT: .LBB97_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB97_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB97_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB97_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB97_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB97_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -8250,8 +8352,18 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB97_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB97_4 +; RV64I-NEXT: .LBB97_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB97_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB97_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB97_1 +; RV64I-NEXT: .LBB97_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8309,17 +8421,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB98_2 ; RV32I-NEXT: .LBB98_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB98_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB98_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB98_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB98_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB98_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -8328,8 +8432,18 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB98_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB98_4 +; RV32I-NEXT: .LBB98_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB98_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB98_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB98_1 +; RV32I-NEXT: .LBB98_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8383,17 +8497,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB98_2 ; RV64I-NEXT: .LBB98_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB98_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB98_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB98_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB98_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB98_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -8402,8 +8508,18 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB98_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB98_4 +; RV64I-NEXT: .LBB98_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB98_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB98_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB98_1 +; RV64I-NEXT: .LBB98_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8461,17 +8577,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: srai s0, a1, 16 ; RV32I-NEXT: addi s3, sp, 10 +; RV32I-NEXT: j .LBB99_2 ; RV32I-NEXT: .LBB99_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a1, a0, 16 -; RV32I-NEXT: srai a1, a1, 16 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bge s0, a1, .LBB99_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB99_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB99_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB99_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB99_2 Depth=1 ; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -8480,8 +8588,18 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 10(sp) -; RV32I-NEXT: beqz a1, .LBB99_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB99_4 +; RV32I-NEXT: .LBB99_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a1, .LBB99_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB99_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB99_1 +; RV32I-NEXT: .LBB99_4: # %atomicrmw.end ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -8535,17 +8653,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: slli a1, a1, 48 ; RV64I-NEXT: srai s0, a1, 48 ; RV64I-NEXT: addi s3, sp, 6 +; RV64I-NEXT: j .LBB99_2 ; RV64I-NEXT: .LBB99_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a1, a0, 48 -; RV64I-NEXT: srai a1, a1, 48 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB99_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB99_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB99_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB99_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB99_2 Depth=1 ; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -8554,8 +8664,18 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 6(sp) -; RV64I-NEXT: beqz a1, .LBB99_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB99_4 +; RV64I-NEXT: .LBB99_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB99_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB99_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB99_1 +; RV64I-NEXT: .LBB99_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -8615,16 +8735,9 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB100_2 ; RV32I-NEXT: .LBB100_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB100_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB100_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB100_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB100_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB100_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -8633,8 +8746,17 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB100_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB100_4 +; RV32I-NEXT: .LBB100_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s1, a1, .LBB100_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB100_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB100_1 +; RV32I-NEXT: .LBB100_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -8686,16 +8808,9 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB100_2 ; RV64I-NEXT: .LBB100_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB100_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB100_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB100_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB100_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB100_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -8704,8 +8819,17 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB100_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB100_4 +; RV64I-NEXT: .LBB100_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s1, a1, .LBB100_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB100_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB100_1 +; RV64I-NEXT: .LBB100_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -8761,16 +8885,9 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB101_2 ; RV32I-NEXT: .LBB101_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB101_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB101_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB101_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB101_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB101_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -8779,8 +8896,17 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB101_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB101_4 +; RV32I-NEXT: .LBB101_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s1, a1, .LBB101_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB101_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB101_1 +; RV32I-NEXT: .LBB101_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -8832,16 +8958,9 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB101_2 ; RV64I-NEXT: .LBB101_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB101_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB101_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB101_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB101_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB101_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -8850,8 +8969,17 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB101_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB101_4 +; RV64I-NEXT: .LBB101_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s1, a1, .LBB101_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB101_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB101_1 +; RV64I-NEXT: .LBB101_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -8907,16 +9035,9 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB102_2 ; RV32I-NEXT: .LBB102_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB102_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB102_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB102_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB102_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB102_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -8925,8 +9046,17 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB102_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB102_4 +; RV32I-NEXT: .LBB102_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s1, a1, .LBB102_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB102_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB102_1 +; RV32I-NEXT: .LBB102_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -8978,16 +9108,9 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB102_2 ; RV64I-NEXT: .LBB102_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB102_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB102_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB102_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB102_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB102_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -8996,8 +9119,17 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB102_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB102_4 +; RV64I-NEXT: .LBB102_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s1, a1, .LBB102_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB102_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB102_1 +; RV64I-NEXT: .LBB102_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9053,16 +9185,9 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB103_2 ; RV32I-NEXT: .LBB103_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB103_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB103_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB103_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB103_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB103_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9071,8 +9196,17 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB103_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB103_4 +; RV32I-NEXT: .LBB103_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s1, a1, .LBB103_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB103_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB103_1 +; RV32I-NEXT: .LBB103_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9124,16 +9258,9 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB103_2 ; RV64I-NEXT: .LBB103_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB103_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB103_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB103_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB103_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB103_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -9142,8 +9269,17 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB103_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB103_4 +; RV64I-NEXT: .LBB103_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s1, a1, .LBB103_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB103_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB103_1 +; RV64I-NEXT: .LBB103_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9199,16 +9335,9 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB104_2 ; RV32I-NEXT: .LBB104_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu s1, a1, .LBB104_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB104_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB104_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB104_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB104_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9217,8 +9346,17 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB104_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB104_4 +; RV32I-NEXT: .LBB104_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s1, a1, .LBB104_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB104_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB104_1 +; RV32I-NEXT: .LBB104_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9270,16 +9408,9 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB104_2 ; RV64I-NEXT: .LBB104_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s1, a1, .LBB104_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB104_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB104_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB104_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB104_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -9288,8 +9419,17 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB104_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB104_4 +; RV64I-NEXT: .LBB104_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s1, a1, .LBB104_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB104_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB104_1 +; RV64I-NEXT: .LBB104_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9345,16 +9485,9 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB105_2 ; RV32I-NEXT: .LBB105_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB105_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB105_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB105_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB105_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB105_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9363,8 +9496,17 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB105_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB105_4 +; RV32I-NEXT: .LBB105_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s1, a1, .LBB105_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB105_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB105_1 +; RV32I-NEXT: .LBB105_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9416,16 +9558,9 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB105_2 ; RV64I-NEXT: .LBB105_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB105_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB105_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB105_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB105_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB105_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -9434,8 +9569,17 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB105_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB105_4 +; RV64I-NEXT: .LBB105_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s1, a1, .LBB105_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB105_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB105_1 +; RV64I-NEXT: .LBB105_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9491,16 +9635,9 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB106_2 ; RV32I-NEXT: .LBB106_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB106_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB106_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB106_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB106_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB106_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9509,8 +9646,17 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB106_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB106_4 +; RV32I-NEXT: .LBB106_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s1, a1, .LBB106_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB106_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB106_1 +; RV32I-NEXT: .LBB106_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9562,16 +9708,9 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB106_2 ; RV64I-NEXT: .LBB106_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB106_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB106_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB106_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB106_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB106_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -9580,8 +9719,17 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB106_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB106_4 +; RV64I-NEXT: .LBB106_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s1, a1, .LBB106_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB106_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB106_1 +; RV64I-NEXT: .LBB106_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9637,16 +9785,9 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB107_2 ; RV32I-NEXT: .LBB107_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB107_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB107_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB107_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB107_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB107_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9655,8 +9796,17 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB107_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB107_4 +; RV32I-NEXT: .LBB107_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s1, a1, .LBB107_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB107_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB107_1 +; RV32I-NEXT: .LBB107_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9708,16 +9858,9 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB107_2 ; RV64I-NEXT: .LBB107_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB107_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB107_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB107_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB107_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB107_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -9726,8 +9869,17 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB107_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB107_4 +; RV64I-NEXT: .LBB107_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s1, a1, .LBB107_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB107_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB107_1 +; RV64I-NEXT: .LBB107_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9783,16 +9935,9 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB108_2 ; RV32I-NEXT: .LBB108_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB108_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB108_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB108_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB108_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB108_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9801,8 +9946,17 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB108_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB108_4 +; RV32I-NEXT: .LBB108_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s1, a1, .LBB108_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB108_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB108_1 +; RV32I-NEXT: .LBB108_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -9854,16 +10008,9 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB108_2 ; RV64I-NEXT: .LBB108_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB108_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB108_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB108_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB108_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB108_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -9872,8 +10019,17 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB108_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB108_4 +; RV64I-NEXT: .LBB108_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s1, a1, .LBB108_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB108_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB108_1 +; RV64I-NEXT: .LBB108_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -9929,16 +10085,9 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: addi s0, a1, -1 ; RV32I-NEXT: and s1, s2, s0 ; RV32I-NEXT: addi s3, sp, 6 +; RV32I-NEXT: j .LBB109_2 ; RV32I-NEXT: .LBB109_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a1, a0, s0 -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bgeu s1, a1, .LBB109_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB109_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB109_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB109_1 Depth=1 +; RV32I-NEXT: # in Loop: Header=BB109_2 Depth=1 ; RV32I-NEXT: sh a0, 6(sp) ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: mv a1, s3 @@ -9947,8 +10096,17 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_2 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: lh a0, 6(sp) -; RV32I-NEXT: beqz a1, .LBB109_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a1, .LBB109_4 +; RV32I-NEXT: .LBB109_2: # %atomicrmw.start +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32I-NEXT: and a1, a0, s0 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s1, a1, .LBB109_1 +; RV32I-NEXT: # %bb.3: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB109_2 Depth=1 +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: j .LBB109_1 +; RV32I-NEXT: .LBB109_4: # %atomicrmw.end ; RV32I-NEXT: lw s4, 8(sp) ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -10000,16 +10158,9 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: addiw s0, a1, -1 ; RV64I-NEXT: and s1, s2, s0 ; RV64I-NEXT: addi s3, sp, 14 +; RV64I-NEXT: j .LBB109_2 ; RV64I-NEXT: .LBB109_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a1, a0, s0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s1, a1, .LBB109_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB109_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB109_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB109_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB109_2 Depth=1 ; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: mv a1, s3 @@ -10018,8 +10169,17 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_2 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lh a0, 14(sp) -; RV64I-NEXT: beqz a1, .LBB109_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB109_4 +; RV64I-NEXT: .LBB109_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: and a1, a0, s0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s1, a1, .LBB109_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB109_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB109_1 +; RV64I-NEXT: .LBB109_4: # %atomicrmw.end ; RV64I-NEXT: ld s4, 16(sp) ; RV64I-NEXT: ld s3, 24(sp) ; RV64I-NEXT: ld s2, 32(sp) @@ -11340,23 +11500,25 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB145_3 ; RV32I-NEXT: .LBB145_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB145_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB145_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB145_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB145_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB145_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB145_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB145_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB145_1 +; RV32I-NEXT: .LBB145_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB145_1 +; RV32I-NEXT: .LBB145_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11383,16 +11545,9 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB145_2 ; RV64I-NEXT: .LBB145_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB145_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB145_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB145_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB145_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB145_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11401,8 +11556,17 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB145_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB145_4 +; RV64I-NEXT: .LBB145_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB145_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB145_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB145_1 +; RV64I-NEXT: .LBB145_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11431,23 +11595,25 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB146_3 ; RV32I-NEXT: .LBB146_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB146_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB146_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB146_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB146_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB146_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB146_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB146_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB146_1 +; RV32I-NEXT: .LBB146_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB146_1 +; RV32I-NEXT: .LBB146_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11474,16 +11640,9 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB146_2 ; RV64I-NEXT: .LBB146_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB146_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB146_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB146_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB146_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB146_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11492,8 +11651,17 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB146_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB146_4 +; RV64I-NEXT: .LBB146_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB146_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB146_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB146_1 +; RV64I-NEXT: .LBB146_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11522,23 +11690,25 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB147_3 ; RV32I-NEXT: .LBB147_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB147_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB147_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB147_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB147_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB147_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB147_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB147_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB147_1 +; RV32I-NEXT: .LBB147_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB147_1 +; RV32I-NEXT: .LBB147_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11565,16 +11735,9 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB147_2 ; RV64I-NEXT: .LBB147_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB147_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB147_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB147_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB147_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB147_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11583,8 +11746,17 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB147_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB147_4 +; RV64I-NEXT: .LBB147_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB147_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB147_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB147_1 +; RV64I-NEXT: .LBB147_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11613,23 +11785,25 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB148_3 ; RV32I-NEXT: .LBB148_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB148_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB148_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB148_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB148_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB148_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB148_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB148_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB148_1 +; RV32I-NEXT: .LBB148_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB148_1 +; RV32I-NEXT: .LBB148_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11656,16 +11830,9 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB148_2 ; RV64I-NEXT: .LBB148_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB148_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB148_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB148_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB148_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB148_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11674,8 +11841,17 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB148_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB148_4 +; RV64I-NEXT: .LBB148_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB148_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB148_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB148_1 +; RV64I-NEXT: .LBB148_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11704,23 +11880,25 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB149_3 ; RV32I-NEXT: .LBB149_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: blt s0, a2, .LBB149_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB149_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB149_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB149_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB149_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB149_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB149_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB149_1 +; RV32I-NEXT: .LBB149_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB149_1 +; RV32I-NEXT: .LBB149_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11747,16 +11925,9 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB149_2 ; RV64I-NEXT: .LBB149_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt s0, a1, .LBB149_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB149_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB149_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB149_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB149_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11765,8 +11936,17 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB149_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB149_4 +; RV64I-NEXT: .LBB149_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a1, .LBB149_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB149_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB149_1 +; RV64I-NEXT: .LBB149_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11795,23 +11975,25 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB150_3 ; RV32I-NEXT: .LBB150_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB150_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB150_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB150_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB150_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB150_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB150_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB150_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB150_1 +; RV32I-NEXT: .LBB150_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB150_1 +; RV32I-NEXT: .LBB150_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11838,16 +12020,9 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB150_2 ; RV64I-NEXT: .LBB150_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB150_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB150_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB150_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB150_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB150_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11856,8 +12031,17 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB150_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB150_4 +; RV64I-NEXT: .LBB150_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB150_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB150_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB150_1 +; RV64I-NEXT: .LBB150_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11886,23 +12070,25 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB151_3 ; RV32I-NEXT: .LBB151_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB151_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB151_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB151_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB151_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB151_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB151_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB151_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB151_1 +; RV32I-NEXT: .LBB151_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB151_1 +; RV32I-NEXT: .LBB151_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -11929,16 +12115,9 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB151_2 ; RV64I-NEXT: .LBB151_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB151_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB151_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB151_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB151_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB151_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -11947,8 +12126,17 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB151_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB151_4 +; RV64I-NEXT: .LBB151_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB151_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB151_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB151_1 +; RV64I-NEXT: .LBB151_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -11977,23 +12165,25 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB152_3 ; RV32I-NEXT: .LBB152_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB152_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB152_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB152_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB152_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB152_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB152_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB152_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB152_1 +; RV32I-NEXT: .LBB152_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB152_1 +; RV32I-NEXT: .LBB152_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12020,16 +12210,9 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB152_2 ; RV64I-NEXT: .LBB152_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB152_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB152_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB152_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB152_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB152_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12038,8 +12221,17 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB152_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB152_4 +; RV64I-NEXT: .LBB152_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB152_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB152_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB152_1 +; RV64I-NEXT: .LBB152_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12068,23 +12260,25 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB153_3 ; RV32I-NEXT: .LBB153_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB153_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB153_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB153_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB153_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB153_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB153_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB153_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB153_1 +; RV32I-NEXT: .LBB153_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB153_1 +; RV32I-NEXT: .LBB153_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12111,16 +12305,9 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB153_2 ; RV64I-NEXT: .LBB153_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB153_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB153_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB153_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB153_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB153_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12129,8 +12316,17 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB153_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB153_4 +; RV64I-NEXT: .LBB153_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB153_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB153_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB153_1 +; RV64I-NEXT: .LBB153_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12159,23 +12355,25 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: blt s0, a2, .LBB154_3 ; RV32I-NEXT: .LBB154_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bge s0, a2, .LBB154_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB154_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB154_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB154_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB154_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB154_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB154_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bge s0, a2, .LBB154_1 +; RV32I-NEXT: .LBB154_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB154_1 +; RV32I-NEXT: .LBB154_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12202,16 +12400,9 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB154_2 ; RV64I-NEXT: .LBB154_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bge s0, a1, .LBB154_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB154_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB154_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB154_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB154_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12220,8 +12411,17 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB154_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB154_4 +; RV64I-NEXT: .LBB154_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a1, .LBB154_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB154_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB154_1 +; RV64I-NEXT: .LBB154_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12250,23 +12450,25 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB155_3 ; RV32I-NEXT: .LBB155_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB155_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB155_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB155_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB155_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB155_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB155_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB155_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB155_1 +; RV32I-NEXT: .LBB155_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB155_1 +; RV32I-NEXT: .LBB155_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12293,16 +12495,9 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB155_2 ; RV64I-NEXT: .LBB155_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB155_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB155_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB155_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB155_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB155_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12311,8 +12506,17 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB155_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB155_4 +; RV64I-NEXT: .LBB155_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB155_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB155_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB155_1 +; RV64I-NEXT: .LBB155_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12341,23 +12545,25 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB156_3 ; RV32I-NEXT: .LBB156_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB156_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB156_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB156_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB156_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB156_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB156_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB156_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB156_1 +; RV32I-NEXT: .LBB156_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB156_1 +; RV32I-NEXT: .LBB156_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12384,16 +12590,9 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB156_2 ; RV64I-NEXT: .LBB156_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB156_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB156_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB156_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB156_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB156_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12402,8 +12601,17 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB156_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB156_4 +; RV64I-NEXT: .LBB156_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB156_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB156_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB156_1 +; RV64I-NEXT: .LBB156_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12432,23 +12640,25 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB157_3 ; RV32I-NEXT: .LBB157_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB157_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB157_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB157_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB157_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB157_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB157_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB157_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB157_1 +; RV32I-NEXT: .LBB157_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB157_1 +; RV32I-NEXT: .LBB157_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12475,16 +12685,9 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB157_2 ; RV64I-NEXT: .LBB157_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB157_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB157_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB157_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB157_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB157_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12493,8 +12696,17 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB157_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB157_4 +; RV64I-NEXT: .LBB157_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB157_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB157_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB157_1 +; RV64I-NEXT: .LBB157_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12523,23 +12735,25 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB158_3 ; RV32I-NEXT: .LBB158_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB158_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB158_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB158_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB158_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB158_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB158_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB158_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB158_1 +; RV32I-NEXT: .LBB158_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB158_1 +; RV32I-NEXT: .LBB158_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12566,16 +12780,9 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB158_2 ; RV64I-NEXT: .LBB158_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB158_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB158_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB158_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB158_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB158_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12584,8 +12791,17 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB158_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB158_4 +; RV64I-NEXT: .LBB158_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB158_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB158_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB158_1 +; RV64I-NEXT: .LBB158_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12614,23 +12830,25 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB159_3 ; RV32I-NEXT: .LBB159_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bltu s0, a2, .LBB159_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB159_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB159_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB159_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB159_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB159_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB159_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB159_1 +; RV32I-NEXT: .LBB159_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB159_1 +; RV32I-NEXT: .LBB159_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12657,16 +12875,9 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB159_2 ; RV64I-NEXT: .LBB159_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu s0, a1, .LBB159_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB159_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB159_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB159_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB159_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12675,8 +12886,17 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB159_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB159_4 +; RV64I-NEXT: .LBB159_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a1, .LBB159_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB159_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB159_1 +; RV64I-NEXT: .LBB159_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12705,23 +12925,25 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB160_3 ; RV32I-NEXT: .LBB160_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB160_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB160_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB160_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB160_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: mv a3, zero ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB160_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB160_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB160_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB160_1 +; RV32I-NEXT: .LBB160_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB160_1 +; RV32I-NEXT: .LBB160_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12748,16 +12970,9 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB160_2 ; RV64I-NEXT: .LBB160_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB160_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB160_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB160_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB160_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB160_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12766,8 +12981,17 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB160_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB160_4 +; RV64I-NEXT: .LBB160_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB160_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB160_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB160_1 +; RV64I-NEXT: .LBB160_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12796,23 +13020,25 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB161_3 ; RV32I-NEXT: .LBB161_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB161_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB161_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB161_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB161_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 2 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB161_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB161_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB161_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB161_1 +; RV32I-NEXT: .LBB161_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB161_1 +; RV32I-NEXT: .LBB161_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12839,16 +13065,9 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB161_2 ; RV64I-NEXT: .LBB161_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB161_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB161_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB161_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB161_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB161_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12857,8 +13076,17 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB161_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB161_4 +; RV64I-NEXT: .LBB161_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB161_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB161_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB161_1 +; RV64I-NEXT: .LBB161_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12887,23 +13115,25 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB162_3 ; RV32I-NEXT: .LBB162_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB162_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB162_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB162_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB162_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 3 ; RV32I-NEXT: mv a4, zero ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB162_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB162_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB162_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB162_1 +; RV32I-NEXT: .LBB162_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB162_1 +; RV32I-NEXT: .LBB162_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -12930,16 +13160,9 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB162_2 ; RV64I-NEXT: .LBB162_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB162_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB162_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB162_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB162_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB162_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -12948,8 +13171,17 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB162_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB162_4 +; RV64I-NEXT: .LBB162_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB162_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB162_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB162_1 +; RV64I-NEXT: .LBB162_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -12978,23 +13210,25 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB163_3 ; RV32I-NEXT: .LBB163_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB163_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB163_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB163_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB163_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 4 ; RV32I-NEXT: addi a4, zero, 2 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB163_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB163_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB163_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB163_1 +; RV32I-NEXT: .LBB163_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB163_1 +; RV32I-NEXT: .LBB163_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -13021,16 +13255,9 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB163_2 ; RV64I-NEXT: .LBB163_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB163_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB163_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB163_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB163_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB163_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -13039,8 +13266,17 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB163_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB163_4 +; RV64I-NEXT: .LBB163_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB163_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB163_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB163_1 +; RV64I-NEXT: .LBB163_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -13069,23 +13305,25 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: addi s2, sp, 12 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bltu s0, a2, .LBB164_3 ; RV32I-NEXT: .LBB164_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: bgeu s0, a2, .LBB164_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB164_1 Depth=1 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: .LBB164_3: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB164_1 Depth=1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: addi a3, zero, 5 ; RV32I-NEXT: addi a4, zero, 5 ; RV32I-NEXT: call __atomic_compare_exchange_4 ; RV32I-NEXT: lw a2, 12(sp) -; RV32I-NEXT: beqz a0, .LBB164_1 -; RV32I-NEXT: # %bb.4: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB164_4 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB164_1 Depth=1 +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: bgeu s0, a2, .LBB164_1 +; RV32I-NEXT: .LBB164_3: # %atomicrmw.start +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: j .LBB164_1 +; RV32I-NEXT: .LBB164_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s2, 16(sp) ; RV32I-NEXT: lw s1, 20(sp) @@ -13112,16 +13350,9 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: lwu a0, 0(a0) ; RV64I-NEXT: sext.w s0, a1 ; RV64I-NEXT: addi s3, sp, 4 +; RV64I-NEXT: j .LBB164_2 ; RV64I-NEXT: .LBB164_1: # %atomicrmw.start -; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sext.w a1, a0 -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bgeu s0, a1, .LBB164_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB164_1 Depth=1 -; RV64I-NEXT: mv a2, s2 -; RV64I-NEXT: .LBB164_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB164_1 Depth=1 +; RV64I-NEXT: # in Loop: Header=BB164_2 Depth=1 ; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s3 @@ -13130,8 +13361,17 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind { ; RV64I-NEXT: call __atomic_compare_exchange_4 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: lw a0, 4(sp) -; RV64I-NEXT: beqz a1, .LBB164_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a1, .LBB164_4 +; RV64I-NEXT: .LBB164_2: # %atomicrmw.start +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: sext.w a1, a0 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a1, .LBB164_1 +; RV64I-NEXT: # %bb.3: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB164_2 Depth=1 +; RV64I-NEXT: mv a2, s2 +; RV64I-NEXT: j .LBB164_1 +; RV64I-NEXT: .LBB164_4: # %atomicrmw.end ; RV64I-NEXT: ld s3, 8(sp) ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -14568,26 +14808,9 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB200_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB200_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB200_3 ; RV32I-NEXT: j .LBB200_4 -; RV32I-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB200_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB200_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB200_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV32I-NEXT: .LBB200_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -14596,8 +14819,23 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB200_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB200_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB200_4 +; RV32I-NEXT: .LBB200_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB200_5 +; RV32I-NEXT: .LBB200_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB200_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB200_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB200_1 +; RV32I-NEXT: .LBB200_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -14621,26 +14859,9 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB200_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB200_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB200_3 ; RV32IA-NEXT: j .LBB200_4 -; RV32IA-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB200_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB200_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB200_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV32IA-NEXT: .LBB200_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -14649,8 +14870,23 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB200_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB200_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB200_4 +; RV32IA-NEXT: .LBB200_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB200_5 +; RV32IA-NEXT: .LBB200_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB200_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB200_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB200_1 +; RV32IA-NEXT: .LBB200_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -14671,23 +14907,25 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB200_3 ; RV64I-NEXT: .LBB200_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB200_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB200_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB200_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB200_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB200_1 +; RV64I-NEXT: .LBB200_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB200_1 +; RV64I-NEXT: .LBB200_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -14719,26 +14957,9 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB201_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB201_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB201_3 ; RV32I-NEXT: j .LBB201_4 -; RV32I-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB201_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB201_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB201_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV32I-NEXT: .LBB201_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -14747,8 +14968,23 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB201_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB201_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB201_4 +; RV32I-NEXT: .LBB201_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB201_5 +; RV32I-NEXT: .LBB201_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB201_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB201_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB201_1 +; RV32I-NEXT: .LBB201_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -14772,26 +15008,9 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB201_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB201_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB201_3 ; RV32IA-NEXT: j .LBB201_4 -; RV32IA-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB201_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB201_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB201_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV32IA-NEXT: .LBB201_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -14800,8 +15019,23 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB201_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB201_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB201_4 +; RV32IA-NEXT: .LBB201_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB201_5 +; RV32IA-NEXT: .LBB201_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB201_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB201_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB201_1 +; RV32IA-NEXT: .LBB201_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -14822,23 +15056,25 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB201_3 ; RV64I-NEXT: .LBB201_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB201_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB201_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB201_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB201_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB201_1 +; RV64I-NEXT: .LBB201_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB201_1 +; RV64I-NEXT: .LBB201_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -14870,26 +15106,9 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB202_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB202_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB202_3 ; RV32I-NEXT: j .LBB202_4 -; RV32I-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB202_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB202_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB202_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV32I-NEXT: .LBB202_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -14898,8 +15117,23 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB202_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB202_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB202_4 +; RV32I-NEXT: .LBB202_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB202_5 +; RV32I-NEXT: .LBB202_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB202_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB202_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB202_1 +; RV32I-NEXT: .LBB202_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -14923,26 +15157,9 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB202_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB202_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB202_3 ; RV32IA-NEXT: j .LBB202_4 -; RV32IA-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB202_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB202_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB202_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV32IA-NEXT: .LBB202_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -14951,8 +15168,23 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB202_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB202_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB202_4 +; RV32IA-NEXT: .LBB202_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB202_5 +; RV32IA-NEXT: .LBB202_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB202_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB202_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB202_1 +; RV32IA-NEXT: .LBB202_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -14973,23 +15205,25 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB202_3 ; RV64I-NEXT: .LBB202_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB202_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB202_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB202_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB202_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB202_1 +; RV64I-NEXT: .LBB202_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB202_1 +; RV64I-NEXT: .LBB202_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15021,26 +15255,9 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB203_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB203_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB203_3 ; RV32I-NEXT: j .LBB203_4 -; RV32I-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB203_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB203_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB203_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV32I-NEXT: .LBB203_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15049,8 +15266,23 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB203_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB203_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB203_4 +; RV32I-NEXT: .LBB203_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB203_5 +; RV32I-NEXT: .LBB203_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB203_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB203_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB203_1 +; RV32I-NEXT: .LBB203_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15074,26 +15306,9 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB203_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB203_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB203_3 ; RV32IA-NEXT: j .LBB203_4 -; RV32IA-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB203_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB203_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB203_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV32IA-NEXT: .LBB203_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15102,8 +15317,23 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB203_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB203_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB203_4 +; RV32IA-NEXT: .LBB203_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB203_5 +; RV32IA-NEXT: .LBB203_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB203_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB203_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB203_1 +; RV32IA-NEXT: .LBB203_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15124,23 +15354,25 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB203_3 ; RV64I-NEXT: .LBB203_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB203_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB203_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB203_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB203_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB203_1 +; RV64I-NEXT: .LBB203_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB203_1 +; RV64I-NEXT: .LBB203_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15172,26 +15404,9 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB204_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB204_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB204_3 ; RV32I-NEXT: j .LBB204_4 -; RV32I-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB204_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB204_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB204_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV32I-NEXT: .LBB204_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15200,8 +15415,23 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB204_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB204_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB204_4 +; RV32I-NEXT: .LBB204_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB204_5 +; RV32I-NEXT: .LBB204_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB204_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB204_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB204_1 +; RV32I-NEXT: .LBB204_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15225,26 +15455,9 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB204_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB204_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB204_3 ; RV32IA-NEXT: j .LBB204_4 -; RV32IA-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB204_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB204_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB204_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV32IA-NEXT: .LBB204_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15253,8 +15466,23 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB204_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB204_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB204_4 +; RV32IA-NEXT: .LBB204_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB204_5 +; RV32IA-NEXT: .LBB204_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB204_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB204_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB204_1 +; RV32IA-NEXT: .LBB204_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15275,23 +15503,25 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB204_3 ; RV64I-NEXT: .LBB204_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: blt s0, a2, .LBB204_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB204_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB204_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB204_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB204_1 +; RV64I-NEXT: .LBB204_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB204_1 +; RV64I-NEXT: .LBB204_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15323,27 +15553,9 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB205_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB205_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB205_3 ; RV32I-NEXT: j .LBB205_4 -; RV32I-NEXT: .LBB205_3: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB205_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB205_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB205_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32I-NEXT: .LBB205_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15352,8 +15564,24 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB205_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB205_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB205_4 +; RV32I-NEXT: .LBB205_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB205_5 +; RV32I-NEXT: .LBB205_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB205_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB205_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB205_1 +; RV32I-NEXT: .LBB205_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15377,27 +15605,9 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB205_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB205_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB205_3 ; RV32IA-NEXT: j .LBB205_4 -; RV32IA-NEXT: .LBB205_3: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB205_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB205_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB205_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32IA-NEXT: .LBB205_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15406,8 +15616,24 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB205_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB205_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB205_4 +; RV32IA-NEXT: .LBB205_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB205_5 +; RV32IA-NEXT: .LBB205_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB205_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB205_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB205_1 +; RV32IA-NEXT: .LBB205_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15428,23 +15654,25 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB205_3 ; RV64I-NEXT: .LBB205_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB205_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB205_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB205_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB205_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB205_1 +; RV64I-NEXT: .LBB205_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB205_1 +; RV64I-NEXT: .LBB205_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15476,27 +15704,9 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB206_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB206_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB206_3 ; RV32I-NEXT: j .LBB206_4 -; RV32I-NEXT: .LBB206_3: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB206_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB206_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB206_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32I-NEXT: .LBB206_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15505,8 +15715,24 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB206_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB206_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB206_4 +; RV32I-NEXT: .LBB206_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB206_5 +; RV32I-NEXT: .LBB206_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB206_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB206_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB206_1 +; RV32I-NEXT: .LBB206_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15530,27 +15756,9 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB206_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB206_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB206_3 ; RV32IA-NEXT: j .LBB206_4 -; RV32IA-NEXT: .LBB206_3: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB206_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB206_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB206_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32IA-NEXT: .LBB206_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15559,8 +15767,24 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB206_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB206_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB206_4 +; RV32IA-NEXT: .LBB206_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB206_5 +; RV32IA-NEXT: .LBB206_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB206_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB206_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB206_1 +; RV32IA-NEXT: .LBB206_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15581,23 +15805,25 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB206_3 ; RV64I-NEXT: .LBB206_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB206_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB206_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB206_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB206_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB206_1 +; RV64I-NEXT: .LBB206_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB206_1 +; RV64I-NEXT: .LBB206_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15629,27 +15855,9 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB207_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB207_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB207_3 ; RV32I-NEXT: j .LBB207_4 -; RV32I-NEXT: .LBB207_3: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB207_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB207_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB207_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32I-NEXT: .LBB207_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15658,8 +15866,24 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB207_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB207_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB207_4 +; RV32I-NEXT: .LBB207_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB207_5 +; RV32I-NEXT: .LBB207_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB207_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB207_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB207_1 +; RV32I-NEXT: .LBB207_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15683,27 +15907,9 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB207_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB207_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB207_3 ; RV32IA-NEXT: j .LBB207_4 -; RV32IA-NEXT: .LBB207_3: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB207_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB207_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB207_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32IA-NEXT: .LBB207_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15712,8 +15918,24 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB207_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB207_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB207_4 +; RV32IA-NEXT: .LBB207_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB207_5 +; RV32IA-NEXT: .LBB207_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB207_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB207_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB207_1 +; RV32IA-NEXT: .LBB207_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15734,23 +15956,25 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB207_3 ; RV64I-NEXT: .LBB207_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB207_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB207_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB207_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB207_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB207_1 +; RV64I-NEXT: .LBB207_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB207_1 +; RV64I-NEXT: .LBB207_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15782,27 +16006,9 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB208_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB208_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB208_3 ; RV32I-NEXT: j .LBB208_4 -; RV32I-NEXT: .LBB208_3: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB208_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB208_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB208_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32I-NEXT: .LBB208_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15811,8 +16017,24 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB208_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB208_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB208_4 +; RV32I-NEXT: .LBB208_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB208_5 +; RV32I-NEXT: .LBB208_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB208_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB208_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB208_1 +; RV32I-NEXT: .LBB208_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15836,27 +16058,9 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB208_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB208_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB208_3 ; RV32IA-NEXT: j .LBB208_4 -; RV32IA-NEXT: .LBB208_3: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB208_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB208_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB208_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32IA-NEXT: .LBB208_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15865,8 +16069,24 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB208_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB208_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB208_4 +; RV32IA-NEXT: .LBB208_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB208_5 +; RV32IA-NEXT: .LBB208_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB208_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB208_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB208_1 +; RV32IA-NEXT: .LBB208_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15887,23 +16107,25 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB208_3 ; RV64I-NEXT: .LBB208_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB208_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB208_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB208_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB208_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB208_1 +; RV64I-NEXT: .LBB208_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB208_1 +; RV64I-NEXT: .LBB208_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -15935,27 +16157,9 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB209_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB209_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB209_3 ; RV32I-NEXT: j .LBB209_4 -; RV32I-NEXT: .LBB209_3: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB209_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB209_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB209_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32I-NEXT: .LBB209_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15964,8 +16168,24 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB209_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB209_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB209_4 +; RV32I-NEXT: .LBB209_3: # %atomicrmw.start +; RV32I-NEXT: slt a0, s0, a1 +; RV32I-NEXT: j .LBB209_5 +; RV32I-NEXT: .LBB209_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB209_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB209_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB209_1 +; RV32I-NEXT: .LBB209_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15989,27 +16209,9 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB209_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB209_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB209_3 ; RV32IA-NEXT: j .LBB209_4 -; RV32IA-NEXT: .LBB209_3: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB209_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB209_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB209_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32IA-NEXT: .LBB209_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16018,8 +16220,24 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB209_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB209_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB209_4 +; RV32IA-NEXT: .LBB209_3: # %atomicrmw.start +; RV32IA-NEXT: slt a0, s0, a1 +; RV32IA-NEXT: j .LBB209_5 +; RV32IA-NEXT: .LBB209_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB209_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB209_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB209_1 +; RV32IA-NEXT: .LBB209_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16040,23 +16258,25 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: blt s0, a2, .LBB209_3 ; RV64I-NEXT: .LBB209_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bge s0, a2, .LBB209_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB209_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB209_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB209_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bge s0, a2, .LBB209_1 +; RV64I-NEXT: .LBB209_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB209_1 +; RV64I-NEXT: .LBB209_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16088,26 +16308,9 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB210_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB210_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB210_3 ; RV32I-NEXT: j .LBB210_4 -; RV32I-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB210_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB210_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB210_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV32I-NEXT: .LBB210_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16116,8 +16319,23 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB210_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB210_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB210_4 +; RV32I-NEXT: .LBB210_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB210_5 +; RV32I-NEXT: .LBB210_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB210_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB210_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB210_1 +; RV32I-NEXT: .LBB210_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16141,26 +16359,9 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB210_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB210_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB210_3 ; RV32IA-NEXT: j .LBB210_4 -; RV32IA-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB210_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB210_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB210_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV32IA-NEXT: .LBB210_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16169,8 +16370,23 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB210_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB210_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB210_4 +; RV32IA-NEXT: .LBB210_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB210_5 +; RV32IA-NEXT: .LBB210_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB210_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB210_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB210_1 +; RV32IA-NEXT: .LBB210_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16191,23 +16407,25 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB210_3 ; RV64I-NEXT: .LBB210_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB210_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB210_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB210_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB210_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB210_1 +; RV64I-NEXT: .LBB210_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB210_1 +; RV64I-NEXT: .LBB210_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16239,26 +16457,9 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB211_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB211_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB211_3 ; RV32I-NEXT: j .LBB211_4 -; RV32I-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB211_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB211_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB211_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV32I-NEXT: .LBB211_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16267,8 +16468,23 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB211_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB211_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB211_4 +; RV32I-NEXT: .LBB211_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB211_5 +; RV32I-NEXT: .LBB211_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB211_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB211_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB211_1 +; RV32I-NEXT: .LBB211_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16292,26 +16508,9 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB211_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB211_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB211_3 ; RV32IA-NEXT: j .LBB211_4 -; RV32IA-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB211_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB211_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB211_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV32IA-NEXT: .LBB211_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16320,8 +16519,23 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB211_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB211_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB211_4 +; RV32IA-NEXT: .LBB211_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB211_5 +; RV32IA-NEXT: .LBB211_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB211_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB211_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB211_1 +; RV32IA-NEXT: .LBB211_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16342,23 +16556,25 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB211_3 ; RV64I-NEXT: .LBB211_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB211_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB211_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB211_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB211_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB211_1 +; RV64I-NEXT: .LBB211_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB211_1 +; RV64I-NEXT: .LBB211_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16390,26 +16606,9 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB212_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB212_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB212_3 ; RV32I-NEXT: j .LBB212_4 -; RV32I-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB212_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB212_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB212_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV32I-NEXT: .LBB212_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16418,8 +16617,23 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB212_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB212_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB212_4 +; RV32I-NEXT: .LBB212_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB212_5 +; RV32I-NEXT: .LBB212_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB212_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB212_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB212_1 +; RV32I-NEXT: .LBB212_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16443,26 +16657,9 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB212_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB212_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB212_3 ; RV32IA-NEXT: j .LBB212_4 -; RV32IA-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB212_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB212_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB212_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV32IA-NEXT: .LBB212_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16471,8 +16668,23 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB212_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB212_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB212_4 +; RV32IA-NEXT: .LBB212_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB212_5 +; RV32IA-NEXT: .LBB212_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB212_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB212_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB212_1 +; RV32IA-NEXT: .LBB212_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16493,23 +16705,25 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB212_3 ; RV64I-NEXT: .LBB212_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB212_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB212_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB212_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB212_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB212_1 +; RV64I-NEXT: .LBB212_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB212_1 +; RV64I-NEXT: .LBB212_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16541,26 +16755,9 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB213_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB213_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB213_3 ; RV32I-NEXT: j .LBB213_4 -; RV32I-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB213_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB213_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB213_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV32I-NEXT: .LBB213_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16569,8 +16766,23 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB213_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB213_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB213_4 +; RV32I-NEXT: .LBB213_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB213_5 +; RV32I-NEXT: .LBB213_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB213_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB213_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB213_1 +; RV32I-NEXT: .LBB213_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16594,26 +16806,9 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB213_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB213_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB213_3 ; RV32IA-NEXT: j .LBB213_4 -; RV32IA-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB213_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB213_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB213_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV32IA-NEXT: .LBB213_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16622,8 +16817,23 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB213_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB213_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB213_4 +; RV32IA-NEXT: .LBB213_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB213_5 +; RV32IA-NEXT: .LBB213_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB213_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB213_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB213_1 +; RV32IA-NEXT: .LBB213_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16644,23 +16854,25 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB213_3 ; RV64I-NEXT: .LBB213_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB213_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB213_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB213_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB213_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB213_1 +; RV64I-NEXT: .LBB213_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB213_1 +; RV64I-NEXT: .LBB213_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16692,26 +16904,9 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB214_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB214_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB214_3 ; RV32I-NEXT: j .LBB214_4 -; RV32I-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB214_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB214_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB214_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV32I-NEXT: .LBB214_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16720,8 +16915,23 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB214_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB214_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB214_4 +; RV32I-NEXT: .LBB214_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB214_5 +; RV32I-NEXT: .LBB214_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB214_5: # %atomicrmw.start +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB214_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB214_1 +; RV32I-NEXT: .LBB214_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16745,26 +16955,9 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB214_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB214_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB214_3 ; RV32IA-NEXT: j .LBB214_4 -; RV32IA-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB214_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB214_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB214_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV32IA-NEXT: .LBB214_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16773,8 +16966,23 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB214_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB214_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB214_4 +; RV32IA-NEXT: .LBB214_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB214_5 +; RV32IA-NEXT: .LBB214_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB214_5: # %atomicrmw.start +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB214_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB214_1 +; RV32IA-NEXT: .LBB214_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16795,23 +17003,25 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB214_3 ; RV64I-NEXT: .LBB214_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bltu s0, a2, .LBB214_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB214_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB214_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB214_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB214_1 +; RV64I-NEXT: .LBB214_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB214_1 +; RV64I-NEXT: .LBB214_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16843,27 +17053,9 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB215_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB215_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB215_3 ; RV32I-NEXT: j .LBB215_4 -; RV32I-NEXT: .LBB215_3: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB215_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB215_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB215_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32I-NEXT: .LBB215_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16872,8 +17064,24 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB215_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB215_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB215_4 +; RV32I-NEXT: .LBB215_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB215_5 +; RV32I-NEXT: .LBB215_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB215_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB215_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB215_1 +; RV32I-NEXT: .LBB215_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16897,27 +17105,9 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB215_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB215_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB215_3 ; RV32IA-NEXT: j .LBB215_4 -; RV32IA-NEXT: .LBB215_3: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB215_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB215_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB215_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32IA-NEXT: .LBB215_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16926,8 +17116,24 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB215_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB215_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB215_4 +; RV32IA-NEXT: .LBB215_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB215_5 +; RV32IA-NEXT: .LBB215_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB215_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB215_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB215_1 +; RV32IA-NEXT: .LBB215_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16948,23 +17154,25 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB215_3 ; RV64I-NEXT: .LBB215_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB215_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB215_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: mv a3, zero ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB215_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB215_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB215_1 +; RV64I-NEXT: .LBB215_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB215_1 +; RV64I-NEXT: .LBB215_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -16996,27 +17204,9 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB216_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB216_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB216_3 ; RV32I-NEXT: j .LBB216_4 -; RV32I-NEXT: .LBB216_3: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB216_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB216_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB216_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32I-NEXT: .LBB216_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17025,8 +17215,24 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB216_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB216_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB216_4 +; RV32I-NEXT: .LBB216_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB216_5 +; RV32I-NEXT: .LBB216_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB216_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB216_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB216_1 +; RV32I-NEXT: .LBB216_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17050,27 +17256,9 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB216_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB216_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB216_3 ; RV32IA-NEXT: j .LBB216_4 -; RV32IA-NEXT: .LBB216_3: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB216_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB216_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB216_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32IA-NEXT: .LBB216_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17079,8 +17267,24 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB216_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB216_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB216_4 +; RV32IA-NEXT: .LBB216_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB216_5 +; RV32IA-NEXT: .LBB216_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB216_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB216_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB216_1 +; RV32IA-NEXT: .LBB216_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17101,23 +17305,25 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB216_3 ; RV64I-NEXT: .LBB216_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB216_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB216_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 2 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB216_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB216_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB216_1 +; RV64I-NEXT: .LBB216_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB216_1 +; RV64I-NEXT: .LBB216_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -17149,27 +17355,9 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB217_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB217_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB217_3 ; RV32I-NEXT: j .LBB217_4 -; RV32I-NEXT: .LBB217_3: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB217_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB217_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB217_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32I-NEXT: .LBB217_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17178,8 +17366,24 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB217_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB217_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB217_4 +; RV32I-NEXT: .LBB217_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB217_5 +; RV32I-NEXT: .LBB217_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB217_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB217_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB217_1 +; RV32I-NEXT: .LBB217_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17203,27 +17407,9 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB217_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB217_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB217_3 ; RV32IA-NEXT: j .LBB217_4 -; RV32IA-NEXT: .LBB217_3: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB217_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB217_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB217_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32IA-NEXT: .LBB217_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17232,8 +17418,24 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB217_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB217_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB217_4 +; RV32IA-NEXT: .LBB217_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB217_5 +; RV32IA-NEXT: .LBB217_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB217_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB217_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB217_1 +; RV32IA-NEXT: .LBB217_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17254,23 +17456,25 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB217_3 ; RV64I-NEXT: .LBB217_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB217_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB217_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 3 ; RV64I-NEXT: mv a4, zero ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB217_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB217_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB217_1 +; RV64I-NEXT: .LBB217_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB217_1 +; RV64I-NEXT: .LBB217_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -17302,27 +17506,9 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB218_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB218_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB218_3 ; RV32I-NEXT: j .LBB218_4 -; RV32I-NEXT: .LBB218_3: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB218_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB218_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB218_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32I-NEXT: .LBB218_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17331,8 +17517,24 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB218_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB218_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB218_4 +; RV32I-NEXT: .LBB218_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB218_5 +; RV32I-NEXT: .LBB218_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB218_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB218_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB218_1 +; RV32I-NEXT: .LBB218_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17356,27 +17558,9 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB218_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB218_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB218_3 ; RV32IA-NEXT: j .LBB218_4 -; RV32IA-NEXT: .LBB218_3: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB218_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB218_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB218_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32IA-NEXT: .LBB218_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17385,8 +17569,24 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB218_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB218_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB218_4 +; RV32IA-NEXT: .LBB218_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB218_5 +; RV32IA-NEXT: .LBB218_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB218_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB218_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB218_1 +; RV32IA-NEXT: .LBB218_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17407,23 +17607,25 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB218_3 ; RV64I-NEXT: .LBB218_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB218_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB218_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 4 ; RV64I-NEXT: addi a4, zero, 2 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB218_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB218_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB218_1 +; RV64I-NEXT: .LBB218_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB218_1 +; RV64I-NEXT: .LBB218_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) @@ -17455,27 +17657,9 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: lw a1, 4(a0) ; RV32I-NEXT: lw a2, 0(a0) ; RV32I-NEXT: mv s3, sp -; RV32I-NEXT: .LBB219_1: # %atomicrmw.start -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a1, s0, .LBB219_3 -; RV32I-NEXT: # %bb.2: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: bne a1, s0, .LBB219_3 ; RV32I-NEXT: j .LBB219_4 -; RV32I-NEXT: .LBB219_3: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: .LBB219_4: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB219_6 -; RV32I-NEXT: # %bb.5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB219_6: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32I-NEXT: .LBB219_1: # %atomicrmw.start ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17484,8 +17668,24 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32I-NEXT: call __atomic_compare_exchange_8 ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB219_1 -; RV32I-NEXT: # %bb.7: # %atomicrmw.end +; RV32I-NEXT: bnez a0, .LBB219_7 +; RV32I-NEXT: # %bb.2: # %atomicrmw.start +; RV32I-NEXT: beq a1, s0, .LBB219_4 +; RV32I-NEXT: .LBB219_3: # %atomicrmw.start +; RV32I-NEXT: sltu a0, s0, a1 +; RV32I-NEXT: j .LBB219_5 +; RV32I-NEXT: .LBB219_4: +; RV32I-NEXT: sltu a0, s2, a2 +; RV32I-NEXT: .LBB219_5: # %atomicrmw.start +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a0, .LBB219_1 +; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a3, s0 +; RV32I-NEXT: j .LBB219_1 +; RV32I-NEXT: .LBB219_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17509,27 +17709,9 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: lw a1, 4(a0) ; RV32IA-NEXT: lw a2, 0(a0) ; RV32IA-NEXT: mv s3, sp -; RV32IA-NEXT: .LBB219_1: # %atomicrmw.start -; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a1, s0, .LBB219_3 -; RV32IA-NEXT: # %bb.2: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: bne a1, s0, .LBB219_3 ; RV32IA-NEXT: j .LBB219_4 -; RV32IA-NEXT: .LBB219_3: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: .LBB219_4: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB219_6 -; RV32IA-NEXT: # %bb.5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB219_6: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32IA-NEXT: .LBB219_1: # %atomicrmw.start ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17538,8 +17720,24 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV32IA-NEXT: call __atomic_compare_exchange_8 ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB219_1 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.end +; RV32IA-NEXT: bnez a0, .LBB219_7 +; RV32IA-NEXT: # %bb.2: # %atomicrmw.start +; RV32IA-NEXT: beq a1, s0, .LBB219_4 +; RV32IA-NEXT: .LBB219_3: # %atomicrmw.start +; RV32IA-NEXT: sltu a0, s0, a1 +; RV32IA-NEXT: j .LBB219_5 +; RV32IA-NEXT: .LBB219_4: +; RV32IA-NEXT: sltu a0, s2, a2 +; RV32IA-NEXT: .LBB219_5: # %atomicrmw.start +; RV32IA-NEXT: xori a0, a0, 1 +; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a0, .LBB219_1 +; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a3, s0 +; RV32IA-NEXT: j .LBB219_1 +; RV32IA-NEXT: .LBB219_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17560,23 +17758,25 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind { ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: ld a2, 0(a0) ; RV64I-NEXT: addi s2, sp, 8 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bltu s0, a2, .LBB219_3 ; RV64I-NEXT: .LBB219_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sd a2, 8(sp) -; RV64I-NEXT: bgeu s0, a2, .LBB219_3 -; RV64I-NEXT: # %bb.2: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: .LBB219_3: # %atomicrmw.start -; RV64I-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: addi a3, zero, 5 ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: call __atomic_compare_exchange_8 ; RV64I-NEXT: ld a2, 8(sp) -; RV64I-NEXT: beqz a0, .LBB219_1 -; RV64I-NEXT: # %bb.4: # %atomicrmw.end +; RV64I-NEXT: bnez a0, .LBB219_4 +; RV64I-NEXT: # %bb.2: # %atomicrmw.start +; RV64I-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV64I-NEXT: sd a2, 8(sp) +; RV64I-NEXT: bgeu s0, a2, .LBB219_1 +; RV64I-NEXT: .LBB219_3: # %atomicrmw.start +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: j .LBB219_1 +; RV64I-NEXT: .LBB219_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ld s2, 16(sp) ; RV64I-NEXT: ld s1, 24(sp) diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll index 2e9ad7a3f16..41cb90ba3e8 100644 --- a/llvm/test/CodeGen/RISCV/remat.ll +++ b/llvm/test/CodeGen/RISCV/remat.ll @@ -52,24 +52,32 @@ define i32 @test() nounwind { ; RV32I-NEXT: lui s0, %hi(d) ; RV32I-NEXT: lui s10, %hi(c) ; RV32I-NEXT: lui s11, %hi(b) -; RV32I-NEXT: .LBB0_2: # %for.body -; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: lw a1, %lo(l)(s2) -; RV32I-NEXT: beqz a1, .LBB0_4 -; RV32I-NEXT: # %bb.3: # %if.then -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: bnez a1, .LBB0_4 +; RV32I-NEXT: j .LBB0_5 +; RV32I-NEXT: .LBB0_2: # %for.inc +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 +; RV32I-NEXT: lw a0, %lo(a)(s9) +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: sw a0, %lo(a)(s9) +; RV32I-NEXT: beqz a0, .LBB0_11 +; RV32I-NEXT: # %bb.3: # %for.body +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 +; RV32I-NEXT: lw a1, %lo(l)(s2) +; RV32I-NEXT: beqz a1, .LBB0_5 +; RV32I-NEXT: .LBB0_4: # %if.then ; RV32I-NEXT: lw a4, %lo(e)(s1) ; RV32I-NEXT: lw a3, %lo(d)(s0) ; RV32I-NEXT: lw a2, %lo(c)(s10) ; RV32I-NEXT: lw a1, %lo(b)(s11) ; RV32I-NEXT: addi a5, zero, 32 ; RV32I-NEXT: call foo -; RV32I-NEXT: .LBB0_4: # %if.end -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: .LBB0_5: # %if.end +; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: lw a0, %lo(k)(s3) -; RV32I-NEXT: beqz a0, .LBB0_6 -; RV32I-NEXT: # %bb.5: # %if.then3 -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: beqz a0, .LBB0_7 +; RV32I-NEXT: # %bb.6: # %if.then3 +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 ; RV32I-NEXT: lw a4, %lo(f)(s8) ; RV32I-NEXT: lw a3, %lo(e)(s1) ; RV32I-NEXT: lw a2, %lo(d)(s0) @@ -77,12 +85,12 @@ define i32 @test() nounwind { ; RV32I-NEXT: lw a0, %lo(b)(s11) ; RV32I-NEXT: addi a5, zero, 64 ; RV32I-NEXT: call foo -; RV32I-NEXT: .LBB0_6: # %if.end5 -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: .LBB0_7: # %if.end5 +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 ; RV32I-NEXT: lw a0, %lo(j)(s4) -; RV32I-NEXT: beqz a0, .LBB0_8 -; RV32I-NEXT: # %bb.7: # %if.then7 -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: beqz a0, .LBB0_9 +; RV32I-NEXT: # %bb.8: # %if.then7 +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 ; RV32I-NEXT: lw a4, %lo(g)(s7) ; RV32I-NEXT: lw a3, %lo(f)(s8) ; RV32I-NEXT: lw a2, %lo(e)(s1) @@ -90,12 +98,12 @@ define i32 @test() nounwind { ; RV32I-NEXT: lw a0, %lo(c)(s10) ; RV32I-NEXT: addi a5, zero, 32 ; RV32I-NEXT: call foo -; RV32I-NEXT: .LBB0_8: # %if.end9 -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: .LBB0_9: # %if.end9 +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 ; RV32I-NEXT: lw a0, %lo(i)(s6) -; RV32I-NEXT: beqz a0, .LBB0_10 -; RV32I-NEXT: # %bb.9: # %if.then11 -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 +; RV32I-NEXT: beqz a0, .LBB0_2 +; RV32I-NEXT: # %bb.10: # %if.then11 +; RV32I-NEXT: # in Loop: Header=BB0_5 Depth=1 ; RV32I-NEXT: lw a4, %lo(h)(s5) ; RV32I-NEXT: lw a3, %lo(g)(s7) ; RV32I-NEXT: lw a2, %lo(f)(s8) @@ -103,12 +111,7 @@ define i32 @test() nounwind { ; RV32I-NEXT: lw a0, %lo(d)(s0) ; RV32I-NEXT: addi a5, zero, 32 ; RV32I-NEXT: call foo -; RV32I-NEXT: .LBB0_10: # %for.inc -; RV32I-NEXT: # in Loop: Header=BB0_2 Depth=1 -; RV32I-NEXT: lw a0, %lo(a)(s9) -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: sw a0, %lo(a)(s9) -; RV32I-NEXT: bnez a0, .LBB0_2 +; RV32I-NEXT: j .LBB0_2 ; RV32I-NEXT: .LBB0_11: # %for.end ; RV32I-NEXT: addi a0, zero, 1 ; RV32I-NEXT: lw s11, 12(sp) diff --git a/llvm/test/CodeGen/Thumb/consthoist-physical-addr.ll b/llvm/test/CodeGen/Thumb/consthoist-physical-addr.ll index 83dc7939968..fbea199d2d1 100644 --- a/llvm/test/CodeGen/Thumb/consthoist-physical-addr.ll +++ b/llvm/test/CodeGen/Thumb/consthoist-physical-addr.ll @@ -10,8 +10,9 @@ define i32 @C(i32 %x, i32* nocapture %y) #0 { ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: ldr r3, .LCPI0_0 -; CHECK-NEXT: b .LBB0_4 ; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: cmp r2, #128 +; CHECK-NEXT: beq .LBB0_5 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: str r4, [r3, #8] ; CHECK-NEXT: lsls r4, r2, #2 @@ -20,16 +21,15 @@ define i32 @C(i32 %x, i32* nocapture %y) #0 { ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: str r5, [r3, #12] ; CHECK-NEXT: isb sy -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: ldr r5, [r3, #12] ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: bne .LBB0_2 +; CHECK-NEXT: bne .LBB0_3 ; CHECK-NEXT: ldr r5, [r3, #4] ; CHECK-NEXT: str r5, [r1, r4] ; CHECK-NEXT: adds r2, r2, #1 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: cmp r2, #128 -; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_5: ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/Thumb/pr42760.ll b/llvm/test/CodeGen/Thumb/pr42760.ll index 4ba5dd48e77..fc9a18bb334 100644 --- a/llvm/test/CodeGen/Thumb/pr42760.ll +++ b/llvm/test/CodeGen/Thumb/pr42760.ll @@ -6,27 +6,31 @@ define hidden void @test() { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: movs r0, #1 ; CHECK-NEXT: lsls r1, r0, #2 -; CHECK-NEXT: .LBB0_1: @ %switch +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_1: @ %bb2 +; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB0_6 +; CHECK-NEXT: .LBB0_2: @ %switch ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adr r2, .LJTI0_0 ; CHECK-NEXT: ldr r2, [r2, r1] ; CHECK-NEXT: mov pc, r2 -; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .LJTI0_0: -; CHECK-NEXT: .long .LBB0_5+1 +; CHECK-NEXT: .long .LBB0_6+1 ; CHECK-NEXT: .long .LBB0_4+1 +; CHECK-NEXT: .long .LBB0_6+1 ; CHECK-NEXT: .long .LBB0_5+1 -; CHECK-NEXT: .long .LBB0_3+1 -; CHECK-NEXT: .LBB0_3: @ %bb -; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: bne .LBB0_5 -; CHECK-NEXT: .LBB0_4: @ %bb2 -; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_4: @ %switch +; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_5: @ %bb +; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq .LBB0_1 -; CHECK-NEXT: .LBB0_5: @ %dead +; CHECK-NEXT: .LBB0_6: @ %dead entry: br label %switch diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index d2d18ec221a..acc4b7e1381 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -82,14 +82,14 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { ; Check that we sink cold loop blocks after the hot loop body. ; CHECK-LABEL: test_loop_cold_blocks: ; CHECK: %entry -; CHECK-NOT: .p2align -; CHECK: %unlikely1 -; CHECK-NOT: .p2align -; CHECK: %unlikely2 ; CHECK: .p2align ; CHECK: %body1 ; CHECK: %body2 ; CHECK: %body3 +; CHECK-NOT: .p2align +; CHECK: %unlikely1 +; CHECK-NOT: .p2align +; CHECK: %unlikely2 ; CHECK: %exit entry: @@ -125,7 +125,7 @@ exit: ret i32 %sum } -!0 = !{!"branch_weights", i32 4, i32 64} +!0 = !{!"branch_weights", i32 1, i32 64} define i32 @test_loop_early_exits(i32 %i, i32* %a) { ; Check that we sink early exit blocks out of loop bodies. @@ -189,8 +189,8 @@ define i32 @test_loop_rotate(i32 %i, i32* %a) { ; loop, eliminating unconditional branches to the top. ; CHECK-LABEL: test_loop_rotate: ; CHECK: %entry -; CHECK: %body1 ; CHECK: %body0 +; CHECK: %body1 ; CHECK: %exit entry: @@ -957,16 +957,15 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { ; CHECK: %if.else ; CHECK: %if.end10 ; Second rotated loop top -; CHECK: .p2align -; CHECK: %if.then24 ; CHECK: %while.cond.outer ; Third rotated loop top ; CHECK: .p2align +; CHECK: %if.end20 ; CHECK: %while.cond ; CHECK: %while.body ; CHECK: %land.lhs.true ; CHECK: %if.then19 -; CHECK: %if.end20 +; CHECK: %if.then24 ; CHECK: %if.then8 ; CHECK: ret @@ -1546,8 +1545,8 @@ define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { ; CHECK-LABEL: not_rotate_if_extra_branch_regression ; CHECK: %.entry ; CHECK: %.first_backedge -; CHECK: %.slow ; CHECK: %.second_header +; CHECK: %.slow .entry: %sum.0 = shl nsw i32 %count, 1 br label %.first_header diff --git a/llvm/test/CodeGen/X86/code_placement.ll b/llvm/test/CodeGen/X86/code_placement.ll index 7b5f4c34690..270612883a9 100644 --- a/llvm/test/CodeGen/X86/code_placement.ll +++ b/llvm/test/CodeGen/X86/code_placement.ll @@ -4,6 +4,11 @@ @Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4] @Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2] +; CHECK: %entry +; CHECK: %bb +; CHECK: %bb1 +; CHECK: %bb2 + define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp { entry: %0 = load i32, i32* %rk, align 4 ; <i32> [#uses=1] @@ -12,8 +17,6 @@ entry: %tmp15 = add i32 %r, -1 ; <i32> [#uses=1] %tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2] br label %bb -; CHECK: jmp -; CHECK-NEXT: align bb: ; preds = %bb1, %entry %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3] diff --git a/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll b/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll index b30aaea9024..48329ed86e1 100644 --- a/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll +++ b/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll @@ -1,13 +1,12 @@ ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s define void @foo() { -; Test that when determining the edge probability from a node in an inner loop -; to a node in an outer loop, the weights on edges in the inner loop should be -; ignored if we are building the chain for the outer loop. +; After moving the latch to the top of loop, there is no fall through from the +; latch to outer loop. ; ; CHECK-LABEL: foo: -; CHECK: callq c ; CHECK: callq b +; CHECK: callq c entry: %call = call zeroext i1 @a() diff --git a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll index 925f880b4fc..cdf2fb05a73 100644 --- a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll +++ b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll @@ -5,13 +5,13 @@ define void @foo() { ; Test a nested loop case when profile data is not available. ; ; CHECK-LABEL: foo: +; CHECK: callq g +; CHECK: callq h ; CHECK: callq b -; CHECK: callq c -; CHECK: callq d ; CHECK: callq e ; CHECK: callq f -; CHECK: callq g -; CHECK: callq h +; CHECK: callq c +; CHECK: callq d entry: br label %header diff --git a/llvm/test/CodeGen/X86/code_placement_no_header_change.ll b/llvm/test/CodeGen/X86/code_placement_no_header_change.ll index 0275606568c..ab173b8de71 100644 --- a/llvm/test/CodeGen/X86/code_placement_no_header_change.ll +++ b/llvm/test/CodeGen/X86/code_placement_no_header_change.ll @@ -7,9 +7,9 @@ define i32 @bar(i32 %count) { ; Later backedge1 and backedge2 is rotated before loop header. ; CHECK-LABEL: bar ; CHECK: %.entry +; CHECK: %.header ; CHECK: %.backedge1 ; CHECK: %.backedge2 -; CHECK: %.header ; CHECK: %.exit .entry: %c = shl nsw i32 %count, 2 diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll index e734773b7a4..e1a1e7b777d 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -258,9 +258,12 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 ; CHECK32-NEXT: xorl %edi, %edi # encoding: [0x31,0xff] ; CHECK32-NEXT: incl %edi # encoding: [0x47] -; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_2: # %for.body +; CHECK32-NEXT: .LBB3_1: # %for.cond +; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] +; CHECK32-NEXT: je .LBB3_13 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %for.body ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] ; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] @@ -314,12 +317,9 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: incl %eax # encoding: [0x40] ; CHECK32-NEXT: decl %edx # encoding: [0x4a] -; CHECK32-NEXT: .LBB3_1: # %for.cond -; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] -; CHECK32-NEXT: jne .LBB3_2 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 -; CHECK32-NEXT: # %bb.13: +; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_13: ; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] ; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A] @@ -369,56 +369,59 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK64-NEXT: .cfi_adjust_cfa_offset 8 ; CHECK64-NEXT: popq %r8 # encoding: [0x41,0x58] ; CHECK64-NEXT: .cfi_adjust_cfa_offset -8 -; CHECK64-NEXT: jmp .LBB3_11 # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_1: # %for.body -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 -; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] -; CHECK64-NEXT: je .LBB3_9 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_1: # %for.cond +; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 ; CHECK64-NEXT: # %bb.2: # %for.body -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 -; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01] -; CHECK64-NEXT: je .LBB3_7 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] +; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; CHECK64-NEXT: # %bb.3: # %for.body -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01] +; CHECK64-NEXT: je .LBB3_8 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.4: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] -; CHECK64-NEXT: jne .LBB3_10 # encoding: [0x75,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.4: # %sw.bb -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: jne .LBB3_11 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.5: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17] ; CHECK64-NEXT: cmpl $43, %edx # encoding: [0x83,0xfa,0x2b] ; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] -; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.5: # %sw.bb -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.6: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d] ; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] -; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.6: # %if.else -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.7: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: addl $-48, %edx # encoding: [0x83,0xc2,0xd0] ; CHECK64-NEXT: cmpl $10, %edx # encoding: [0x83,0xfa,0x0a] -; CHECK64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_7: # %sw.bb14 -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: jmp .LBB3_9 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_8: # %sw.bb14 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] ; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] ; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] -; CHECK64-NEXT: .LBB3_8: # %if.else -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: .LBB3_9: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9] -; CHECK64-NEXT: jb .LBB3_10 # encoding: [0x72,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: jb .LBB3_11 # encoding: [0x72,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; CHECK64-NEXT: jmp .LBB3_13 # encoding: [0xeb,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_9: # %sw.bb22 -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: .LBB3_10: # %sw.bb22 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] ; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] ; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] @@ -426,16 +429,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL ; CHECK64-NEXT: # encoding: [0x73,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_10: # %for.inc -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: .LBB3_11: # %for.inc +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: incq %rdi # encoding: [0x48,0xff,0xc7] ; CHECK64-NEXT: decq %rax # encoding: [0x48,0xff,0xc8] -; CHECK64-NEXT: .LBB3_11: # %for.cond -; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] -; CHECK64-NEXT: jne .LBB3_1 # encoding: [0x75,A] +; CHECK64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.12: +; CHECK64-NEXT: .LBB3_12: ; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] ; CHECK64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK64-NEXT: # kill: def $al killed $al killed $eax @@ -451,51 +451,54 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; WIN64-NEXT: movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8] ; WIN64-NEXT: leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01] ; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; WIN64-NEXT: jmp .LBB3_10 # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_1: # %for.body -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 -; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] -; WIN64-NEXT: je .LBB3_8 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_1: # %for.cond +; WIN64-NEXT: # =>This Inner Loop Header: Depth=1 +; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0] +; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; WIN64-NEXT: # %bb.2: # %for.body -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 -; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01] -; WIN64-NEXT: je .LBB3_6 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_6-1, kind: FK_PCRel_1 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] +; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 ; WIN64-NEXT: # %bb.3: # %for.body -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01] +; WIN64-NEXT: je .LBB3_7 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.4: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] -; WIN64-NEXT: jne .LBB3_9 # encoding: [0x75,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.4: # %sw.bb -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: jne .LBB3_10 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.5: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] ; WIN64-NEXT: cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b] ; WIN64-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00] -; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.5: # %sw.bb -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.6: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d] -; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 -; WIN64-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_6: # %sw.bb14 -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_7: # %sw.bb14 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] -; WIN64-NEXT: .LBB3_7: # %if.else -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: .LBB3_8: # %if.else +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] ; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] ; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a] -; WIN64-NEXT: jb .LBB3_9 # encoding: [0x72,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: jb .LBB3_10 # encoding: [0x72,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; WIN64-NEXT: jmp .LBB3_12 # encoding: [0xeb,A] ; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_8: # %sw.bb22 -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: .LBB3_9: # %sw.bb22 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] ; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] ; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] @@ -503,16 +506,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; WIN64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL ; WIN64-NEXT: # encoding: [0x73,A] ; WIN64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_9: # %for.inc -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: .LBB3_10: # %for.inc +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: incq %rcx # encoding: [0x48,0xff,0xc1] ; WIN64-NEXT: decq %r8 # encoding: [0x49,0xff,0xc8] -; WIN64-NEXT: .LBB3_10: # %for.cond -; WIN64-NEXT: # =>This Inner Loop Header: Depth=1 -; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0] -; WIN64-NEXT: jne .LBB3_1 # encoding: [0x75,A] +; WIN64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] ; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.11: +; WIN64-NEXT: .LBB3_11: ; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] ; WIN64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; WIN64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/loop-blocks.ll b/llvm/test/CodeGen/X86/loop-blocks.ll index f39c8a8eab9..a5e806d936b 100644 --- a/llvm/test/CodeGen/X86/loop-blocks.ll +++ b/llvm/test/CodeGen/X86/loop-blocks.ll @@ -7,12 +7,14 @@ ; order to avoid a branch within the loop. ; CHECK-LABEL: simple: -; CHECK: jmp .LBB0_1 -; CHECK-NEXT: align -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: callq loop_latch +; CHECK: align ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: callq loop_header +; CHECK: js .LBB0_3 +; CHECK-NEXT: callq loop_latch +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: callq exit define void @simple() nounwind { entry: @@ -75,17 +77,21 @@ exit: ; CHECK-LABEL: yet_more_involved: ; CHECK: jmp .LBB2_1 ; CHECK-NEXT: align -; CHECK-NEXT: .LBB2_5: -; CHECK-NEXT: callq block_a_true_func -; CHECK-NEXT: callq block_a_merge_func -; CHECK-NEXT: .LBB2_1: + +; CHECK: .LBB2_1: ; CHECK-NEXT: callq body -; -; LBB2_4 -; CHECK: callq bar99 +; CHECK-NEXT: callq get +; CHECK-NEXT: cmpl $2, %eax +; CHECK-NEXT: jge .LBB2_2 +; CHECK-NEXT: callq bar99 ; CHECK-NEXT: callq get ; CHECK-NEXT: cmpl $2999, %eax -; CHECK-NEXT: jle .LBB2_5 +; CHECK-NEXT: jg .LBB2_6 +; CHECK-NEXT: callq block_a_true_func +; CHECK-NEXT: callq block_a_merge_func +; CHECK-NEXT: jmp .LBB2_1 +; CHECK-NEXT: align +; CHECK-NEXT: .LBB2_6: ; CHECK-NEXT: callq block_a_false_func ; CHECK-NEXT: callq block_a_merge_func ; CHECK-NEXT: jmp .LBB2_1 @@ -201,12 +207,12 @@ block102: } ; CHECK-LABEL: check_minsize: -; CHECK: jmp .LBB4_1 ; CHECK-NOT: align -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: callq loop_latch -; CHECK-NEXT: .LBB4_1: +; CHECK: .LBB4_1: ; CHECK-NEXT: callq loop_header +; CHECK: callq loop_latch +; CHECK: .LBB4_3: +; CHECK: callq exit define void @check_minsize() minsize nounwind { diff --git a/llvm/test/CodeGen/X86/loop-rotate.ll b/llvm/test/CodeGen/X86/loop-rotate.ll new file mode 100644 index 00000000000..3f0a390e7c1 --- /dev/null +++ b/llvm/test/CodeGen/X86/loop-rotate.ll @@ -0,0 +1,120 @@ +; RUN: llc -mtriple=i686-linux < %s | FileCheck %s + +; Don't rotate the loop if the number of fall through to exit is not larger +; than the number of fall through to header. +define void @no_rotate() { +; CHECK-LABEL: no_rotate +; CHECK: %entry +; CHECK: %header +; CHECK: %middle +; CHECK: %latch1 +; CHECK: %latch2 +; CHECK: %end +entry: + br label %header + +header: + %val1 = call i1 @foo() + br i1 %val1, label %middle, label %end + +middle: + %val2 = call i1 @foo() + br i1 %val2, label %latch1, label %end + +latch1: + %val3 = call i1 @foo() + br i1 %val3, label %latch2, label %header + +latch2: + %val4 = call i1 @foo() + br label %header + +end: + ret void +} + +define void @do_rotate() { +; CHECK-LABEL: do_rotate +; CHECK: %entry +; CHECK: %then +; CHECK: %else +; CHECK: %latch1 +; CHECK: %latch2 +; CHECK: %header +; CHECK: %end +entry: + %val0 = call i1 @foo() + br i1 %val0, label %then, label %else + +then: + call void @a() + br label %header + +else: + call void @b() + br label %header + +header: + %val1 = call i1 @foo() + br i1 %val1, label %latch1, label %end + +latch1: + %val3 = call i1 @foo() + br i1 %val3, label %latch2, label %header + +latch2: + %val4 = call i1 @foo() + br label %header + +end: + ret void +} + +; The loop structure is same as in @no_rotate, but the loop header's predecessor +; doesn't fall through to it, so it should be rotated to get exit fall through. +define void @do_rotate2() { +; CHECK-LABEL: do_rotate2 +; CHECK: %entry +; CHECK: %then +; CHECK: %middle +; CHECK: %latch1 +; CHECK: %latch2 +; CHECK: %header +; CHECK: %exit +entry: + %val0 = call i1 @foo() + br i1 %val0, label %then, label %header, !prof !1 + +then: + call void @a() + br label %end + +header: + %val1 = call i1 @foo() + br i1 %val1, label %middle, label %exit + +middle: + %val2 = call i1 @foo() + br i1 %val2, label %latch1, label %exit + +latch1: + %val3 = call i1 @foo() + br i1 %val3, label %latch2, label %header + +latch2: + %val4 = call i1 @foo() + br label %header + +exit: + call void @b() + br label %end + +end: + ret void +} + +declare i1 @foo() +declare void @a() +declare void @b() + +!1 = !{!"branch_weights", i32 10, i32 1} diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll index d3e758e7c74..d551ed9a093 100644 --- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -21,22 +21,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8 ; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10 ; GENERIC-NEXT: movq %rcx, %r11 -; GENERIC-NEXT: jmp LBB0_1 ; GENERIC-NEXT: .p2align 4, 0x90 -; GENERIC-NEXT: LBB0_2: ## %bb1 -; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; GENERIC-NEXT: movl %edi, %ebx -; GENERIC-NEXT: shrl $16, %ebx -; GENERIC-NEXT: movzbl %bl, %ebx -; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax -; GENERIC-NEXT: xorl -4(%r14), %eax -; GENERIC-NEXT: shrl $24, %edi -; GENERIC-NEXT: movzbl %bpl, %ebx -; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx -; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx -; GENERIC-NEXT: xorl (%r14), %ebx -; GENERIC-NEXT: decq %r11 -; GENERIC-NEXT: addq $16, %r14 ; GENERIC-NEXT: LBB0_1: ## %bb ; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1 ; GENERIC-NEXT: movzbl %al, %edi @@ -56,8 +41,23 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; GENERIC-NEXT: shrl $24, %eax ; GENERIC-NEXT: movl (%r9,%rax,4), %eax ; GENERIC-NEXT: testq %r11, %r11 -; GENERIC-NEXT: jne LBB0_2 -; GENERIC-NEXT: ## %bb.3: ## %bb2 +; GENERIC-NEXT: je LBB0_3 +; GENERIC-NEXT: ## %bb.2: ## %bb1 +; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; GENERIC-NEXT: movl %edi, %ebx +; GENERIC-NEXT: shrl $16, %ebx +; GENERIC-NEXT: movzbl %bl, %ebx +; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax +; GENERIC-NEXT: xorl -4(%r14), %eax +; GENERIC-NEXT: shrl $24, %edi +; GENERIC-NEXT: movzbl %bpl, %ebx +; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx +; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx +; GENERIC-NEXT: xorl (%r14), %ebx +; GENERIC-NEXT: decq %r11 +; GENERIC-NEXT: addq $16, %r14 +; GENERIC-NEXT: jmp LBB0_1 +; GENERIC-NEXT: LBB0_3: ## %bb2 ; GENERIC-NEXT: shlq $4, %rcx ; GENERIC-NEXT: andl $-16777216, %eax ## imm = 0xFF000000 ; GENERIC-NEXT: movl %edi, %ebx @@ -105,21 +105,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10 ; ATOM-NEXT: decl %ecx ; ATOM-NEXT: movq %rcx, %r11 -; ATOM-NEXT: jmp LBB0_1 ; ATOM-NEXT: .p2align 4, 0x90 -; ATOM-NEXT: LBB0_2: ## %bb1 -; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; ATOM-NEXT: shrl $16, %eax -; ATOM-NEXT: shrl $24, %edi -; ATOM-NEXT: decq %r11 -; ATOM-NEXT: movzbl %al, %ebp -; ATOM-NEXT: movzbl %bl, %eax -; ATOM-NEXT: movl (%r10,%rax,4), %eax -; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d -; ATOM-NEXT: xorl (%r9,%rdi,4), %eax -; ATOM-NEXT: xorl -4(%r14), %r15d -; ATOM-NEXT: xorl (%r14), %eax -; ATOM-NEXT: addq $16, %r14 ; ATOM-NEXT: LBB0_1: ## %bb ; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1 ; ATOM-NEXT: movl %eax, %edi @@ -140,8 +126,22 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; ATOM-NEXT: movl (%r9,%rax,4), %r15d ; ATOM-NEXT: testq %r11, %r11 ; ATOM-NEXT: movl %edi, %eax -; ATOM-NEXT: jne LBB0_2 -; ATOM-NEXT: ## %bb.3: ## %bb2 +; ATOM-NEXT: je LBB0_3 +; ATOM-NEXT: ## %bb.2: ## %bb1 +; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; ATOM-NEXT: shrl $16, %eax +; ATOM-NEXT: shrl $24, %edi +; ATOM-NEXT: decq %r11 +; ATOM-NEXT: movzbl %al, %ebp +; ATOM-NEXT: movzbl %bl, %eax +; ATOM-NEXT: movl (%r10,%rax,4), %eax +; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d +; ATOM-NEXT: xorl (%r9,%rdi,4), %eax +; ATOM-NEXT: xorl -4(%r14), %r15d +; ATOM-NEXT: xorl (%r14), %eax +; ATOM-NEXT: addq $16, %r14 +; ATOM-NEXT: jmp LBB0_1 +; ATOM-NEXT: LBB0_3: ## %bb2 ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $8, %edi ; ATOM-NEXT: movzbl %bl, %ebp diff --git a/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll b/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll index 718ec1efa3b..d86ec9c8129 100644 --- a/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll +++ b/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll @@ -1,11 +1,11 @@ -; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux --force-precise-rotation-cost < %s | FileCheck %s +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s ; The block latch should be moved before header. ;CHECK-LABEL: test1: ;CHECK: %latch ;CHECK: %header ;CHECK: %false -define i32 @test1(i32* %p) !prof !0 { +define i32 @test1(i32* %p) { entry: br label %header @@ -39,7 +39,7 @@ exit: ;CHECK: %latch ;CHECK: %header ;CHECK: %false -define i32 @test2(i32* %p) !prof !0 { +define i32 @test2(i32* %p) { entry: br label %header @@ -107,7 +107,7 @@ exit: ;CHECK: %latch ;CHECK: %header ;CHECK: %false -define i32 @test3(i32* %p) !prof !0 { +define i32 @test3(i32* %p) { entry: br label %header @@ -173,9 +173,9 @@ exit: ;CHECK: %header ;CHECK: %true ;CHECK: %latch -;CHECK: %exit ;CHECK: %false -define i32 @test4(i32 %t, i32* %p) !prof !0 { +;CHECK: %exit +define i32 @test4(i32 %t, i32* %p) { entry: br label %header @@ -207,7 +207,6 @@ exit: ret i32 %count4 } -!0 = !{!"function_entry_count", i32 1000} !1 = !{!"branch_weights", i32 100, i32 1} !2 = !{!"branch_weights", i32 16, i32 16} !3 = !{!"branch_weights", i32 51, i32 49} @@ -217,7 +216,7 @@ exit: ;CHECK: %entry ;CHECK: %header ;CHECK: %latch -define void @test5(i32* %p) !prof !0 { +define void @test5(i32* %p) { entry: br label %header @@ -237,3 +236,4 @@ latch: exit: ret void } + diff --git a/llvm/test/CodeGen/X86/pr38185.ll b/llvm/test/CodeGen/X86/pr38185.ll index 778fb55b419..3a917f924c1 100644 --- a/llvm/test/CodeGen/X86/pr38185.ll +++ b/llvm/test/CodeGen/X86/pr38185.ll @@ -5,9 +5,13 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %body +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %r9 +; CHECK-NEXT: cmpq %rcx, %r9 +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %body ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $1, (%rdx,%r9,4) ; CHECK-NEXT: movzbl (%rdi,%r9,4), %r8d @@ -17,12 +21,8 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) { ; CHECK-NEXT: movl %eax, (%rdi,%r9,4) ; CHECK-NEXT: incq %r9 ; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: .LBB0_1: # %loop -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %r9 -; CHECK-NEXT: cmpq %rcx, %r9 -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.3: # %endloop +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %endloop ; CHECK-NEXT: retq %i = alloca i64 store i64 0, i64* %i diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index fd3d83ed2cb..9238ab0bf89 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -103,6 +103,34 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_20: ## %sw.bb256 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: decl %r15d +; CHECK-NEXT: testl %r15d, %r15d +; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: jle LBB0_22 +; CHECK-NEXT: LBB0_13: ## %while.body200 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 +; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: cmpl $105, %eax +; CHECK-NEXT: ja LBB0_14 +; CHECK-NEXT: ## %bb.56: ## %while.body200 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movslq (%rdi,%rax,4), %rax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: jmpq *%rax +; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: jne LBB0_21 +; CHECK-NEXT: jmp LBB0_55 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: leal 1(%r14), %eax @@ -118,12 +146,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $1, %r13d ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: jne LBB0_21 -; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: testb %dl, %dl @@ -137,30 +159,52 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: ## implicit-def: $rax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jns LBB0_30 +; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_29: ## %land.rhs485 -; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 -; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: LBB0_32: ## %do.body479.backedge +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 +; CHECK-NEXT: leaq 1(%r12), %rax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je LBB0_33 +; CHECK-NEXT: ## %bb.29: ## %land.rhs485 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: js LBB0_55 -; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 +; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 ; CHECK-NEXT: movq %rax, %r12 ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_34 -; CHECK-NEXT: LBB0_32: ## %do.body479.backedge -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_29 -; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit +; CHECK-NEXT: jne LBB0_32 +; CHECK-NEXT: jmp LBB0_34 +; CHECK-NEXT: LBB0_45: ## %sw.bb1134 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: jb LBB0_55 +; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: LBB0_19: ## %sw.bb243 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: incq %r12 ; CHECK-NEXT: LBB0_34: ## %if.end517 @@ -199,47 +243,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi ; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_45: ## %sw.bb1134 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: cmpq %rax, %rcx -; CHECK-NEXT: jb LBB0_55 -; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_19: ## %sw.bb243 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_13: ## %while.body200 -; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 -; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax -; CHECK-NEXT: cmpl $105, %eax -; CHECK-NEXT: ja LBB0_14 -; CHECK-NEXT: ## %bb.56: ## %while.body200 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax -; CHECK-NEXT: jmpq *%rax -; CHECK-NEXT: LBB0_20: ## %sw.bb256 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: decl %r15d -; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d -; CHECK-NEXT: jg LBB0_13 -; CHECK-NEXT: jmp LBB0_22 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/X86/reverse_branches.ll b/llvm/test/CodeGen/X86/reverse_branches.ll index 9f51a6313cc..fabde167949 100644 --- a/llvm/test/CodeGen/X86/reverse_branches.ll +++ b/llvm/test/CodeGen/X86/reverse_branches.ll @@ -85,25 +85,36 @@ define i32 @test_branches_order() uwtable ssp { ; CHECK-NEXT: jg LBB0_16 ; CHECK-NEXT: LBB0_9: ## %for.cond18.preheader ; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_11 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_12 Depth 3 ; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 +; CHECK-NEXT: jle LBB0_11 +; CHECK-NEXT: jmp LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_10: ## %for.cond18 +; CHECK-NEXT: LBB0_14: ## %exit +; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2 +; CHECK-NEXT: addq %rsi, %rbp +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: decq %rsi +; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9 +; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18 +; CHECK-NEXT: jne LBB0_5 +; CHECK-NEXT: ## %bb.10: ## %for.cond18 +; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2 +; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 +; CHECK-NEXT: jg LBB0_15 +; CHECK-NEXT: LBB0_11: ## %for.body20 ; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 ; CHECK-NEXT: ## => This Loop Header: Depth=2 ; CHECK-NEXT: ## Child Loop BB0_12 Depth 3 -; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 -; CHECK-NEXT: jg LBB0_15 -; CHECK-NEXT: ## %bb.11: ## %for.body20 -; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2 ; CHECK-NEXT: movq $-1000, %rbp ## imm = 0xFC18 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_12: ## %do.body.i ; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 -; CHECK-NEXT: ## Parent Loop BB0_10 Depth=2 +; CHECK-NEXT: ## Parent Loop BB0_11 Depth=2 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=3 ; CHECK-NEXT: cmpb $120, 1000(%rdx,%rbp) ; CHECK-NEXT: je LBB0_14 @@ -111,16 +122,6 @@ define i32 @test_branches_order() uwtable ssp { ; CHECK-NEXT: ## in Loop: Header=BB0_12 Depth=3 ; CHECK-NEXT: incq %rbp ; CHECK-NEXT: jne LBB0_12 -; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_14: ## %exit -; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2 -; CHECK-NEXT: addq %rsi, %rbp -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: decq %rsi -; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9 -; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18 -; CHECK-NEXT: je LBB0_10 ; CHECK-NEXT: LBB0_5: ## %if.then ; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: callq _puts diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll index 158243ad972..934581e137f 100644 --- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -215,10 +215,7 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp ; X64-NEXT: movl %esi, %ebp ; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: xorl %ebx, %ebx -; X64-NEXT: jmp .LBB2_3 ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB2_6: # in Loop: Header=BB2_3 Depth=1 -; X64-NEXT: cmovgeq %r15, %rax ; X64-NEXT: .LBB2_3: # %l.header ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movslq (%r12), %rcx @@ -237,8 +234,11 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp ; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: incl %ebx ; X64-NEXT: cmpl %ebp, %ebx -; X64-NEXT: jl .LBB2_6 -; X64-NEXT: # %bb.4: +; X64-NEXT: jge .LBB2_4 +; X64-NEXT: # %bb.6: # in Loop: Header=BB2_3 Depth=1 +; X64-NEXT: cmovgeq %r15, %rax +; X64-NEXT: jmp .LBB2_3 +; X64-NEXT: .LBB2_4: ; X64-NEXT: cmovlq %r15, %rax ; X64-NEXT: .LBB2_5: # %exit ; X64-NEXT: shlq $47, %rax @@ -328,20 +328,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: xorl %r13d, %r13d ; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testl %r15d, %r15d -; X64-NEXT: jg .LBB3_5 -; X64-NEXT: jmp .LBB3_4 -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_12: -; X64-NEXT: cmovgeq %rbp, %rax -; X64-NEXT: testl %r15d, %r15d ; X64-NEXT: jle .LBB3_4 +; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_5: # %l2.header.preheader ; X64-NEXT: cmovleq %rbp, %rax ; X64-NEXT: xorl %r15d, %r15d -; X64-NEXT: jmp .LBB3_6 ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_11: # in Loop: Header=BB3_6 Depth=1 -; X64-NEXT: cmovgeq %rbp, %rax ; X64-NEXT: .LBB3_6: # %l2.header ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movslq (%rbx), %rcx @@ -360,8 +352,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r15d ; X64-NEXT: cmpl %r12d, %r15d -; X64-NEXT: jl .LBB3_11 -; X64-NEXT: # %bb.7: +; X64-NEXT: jge .LBB3_7 +; X64-NEXT: # %bb.11: # in Loop: Header=BB3_6 Depth=1 +; X64-NEXT: cmovgeq %rbp, %rax +; X64-NEXT: jmp .LBB3_6 +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB3_7: ; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload ; X64-NEXT: jmp .LBB3_8 @@ -385,8 +381,13 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r13d ; X64-NEXT: cmpl %r15d, %r13d -; X64-NEXT: jl .LBB3_12 -; X64-NEXT: # %bb.9: +; X64-NEXT: jge .LBB3_9 +; X64-NEXT: # %bb.12: +; X64-NEXT: cmovgeq %rbp, %rax +; X64-NEXT: testl %r15d, %r15d +; X64-NEXT: jg .LBB3_5 +; X64-NEXT: jmp .LBB3_4 +; X64-NEXT: .LBB3_9: ; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: .LBB3_10: # %exit ; X64-NEXT: shlq $47, %rax @@ -418,7 +419,17 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-LFENCE-NEXT: movl %esi, %r15d ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: xorl %r12d, %r12d +; X64-LFENCE-NEXT: jmp .LBB3_2 ; X64-LFENCE-NEXT: .p2align 4, 0x90 +; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch +; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1 +; X64-LFENCE-NEXT: lfence +; X64-LFENCE-NEXT: movslq (%rbx), %rax +; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi +; X64-LFENCE-NEXT: callq sink +; X64-LFENCE-NEXT: incl %r12d +; X64-LFENCE-NEXT: cmpl %r15d, %r12d +; X64-LFENCE-NEXT: jge .LBB3_6 ; X64-LFENCE-NEXT: .LBB3_2: # %l1.header ; X64-LFENCE-NEXT: # =>This Loop Header: Depth=1 ; X64-LFENCE-NEXT: # Child Loop BB3_4 Depth 2 @@ -440,15 +451,7 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-LFENCE-NEXT: incl %ebp ; X64-LFENCE-NEXT: cmpl %r13d, %ebp ; X64-LFENCE-NEXT: jl .LBB3_4 -; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch -; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1 -; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: movslq (%rbx), %rax -; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi -; X64-LFENCE-NEXT: callq sink -; X64-LFENCE-NEXT: incl %r12d -; X64-LFENCE-NEXT: cmpl %r15d, %r12d -; X64-LFENCE-NEXT: jl .LBB3_2 +; X64-LFENCE-NEXT: jmp .LBB3_5 ; X64-LFENCE-NEXT: .LBB3_6: # %exit ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: addq $8, %rsp diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll index 13f582b6c99..c11ffaba12a 100644 --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -12,14 +12,17 @@ define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: incq %rsi ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %inner_loop_top +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-NEXT: cmpb $0, (%rsi) +; CHECK-NEXT: js .LBB0_3 ; CHECK-NEXT: .LBB0_4: # %inner_loop_latch -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: addq $2, %rsi -; CHECK-NEXT: .LBB0_2: # %inner_loop_top -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpb $0, (%rsi) ; CHECK-NEXT: jns .LBB0_4 ; CHECK-NEXT: jmp .LBB0_3 @@ -130,58 +133,58 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3 ; CHECK-NEXT: testl %ebp, %ebp ; CHECK-NEXT: je .LBB1_18 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_8: # %shared_loop_header +; CHECK-NEXT: .LBB1_9: # %shared_loop_header ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testq %rbx, %rbx ; CHECK-NEXT: jne .LBB1_27 -; CHECK-NEXT: # %bb.9: # %inner_loop_body -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: # %bb.10: # %inner_loop_body +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jns .LBB1_8 -; CHECK-NEXT: # %bb.10: # %if.end96.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jns .LBB1_9 +; CHECK-NEXT: # %bb.11: # %if.end96.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: cmpl $3, %ebp ; CHECK-NEXT: jae .LBB1_22 -; CHECK-NEXT: # %bb.11: # %if.end287.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: # %bb.12: # %if.end287.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: cmpl $1, %ebp ; CHECK-NEXT: setne %dl ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_15 -; CHECK-NEXT: # %bb.12: # %if.end308.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jne .LBB1_16 +; CHECK-NEXT: # %bb.13: # %if.end308.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB1_17 -; CHECK-NEXT: # %bb.13: # %if.end335.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: je .LBB1_7 +; CHECK-NEXT: # %bb.14: # %if.end335.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: movl $0, %esi -; CHECK-NEXT: jne .LBB1_7 -; CHECK-NEXT: # %bb.14: # %merge_other -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jne .LBB1_8 +; CHECK-NEXT: # %bb.15: # %merge_other +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: jmp .LBB1_16 -; CHECK-NEXT: .LBB1_15: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jmp .LBB1_17 +; CHECK-NEXT: .LBB1_16: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movb %dl, %sil ; CHECK-NEXT: addl $3, %esi -; CHECK-NEXT: .LBB1_16: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: .LBB1_17: # %outer_loop_latch +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: # implicit-def: $dl -; CHECK-NEXT: jmp .LBB1_7 -; CHECK-NEXT: .LBB1_17: # %merge_predecessor_split -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jmp .LBB1_8 +; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movb $32, %dl ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_7: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: .LBB1_8: # %outer_loop_latch +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movzwl %si, %esi ; CHECK-NEXT: decl %esi ; CHECK-NEXT: movzwl %si, %esi ; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB1_8 +; CHECK-NEXT: jne .LBB1_9 ; CHECK-NEXT: .LBB1_18: # %while.cond.us1412.i ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al diff --git a/llvm/test/CodeGen/X86/tail-dup-repeat.ll b/llvm/test/CodeGen/X86/tail-dup-repeat.ll index 9a1867b8735..bfa1ee61145 100644 --- a/llvm/test/CodeGen/X86/tail-dup-repeat.ll +++ b/llvm/test/CodeGen/X86/tail-dup-repeat.ll @@ -10,35 +10,30 @@ define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 { ; CHECK-LABEL: repeated_tail_dup: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_1: # %for.cond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.2: # %land.lhs.true -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $10, (%rdx) -; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %land.lhs.true +; CHECK-NEXT: movl $10, (%rdx) +; CHECK-NEXT: .LBB0_6: # %dup2 +; CHECK-NEXT: movl $2, (%rcx) +; CHECK-NEXT: testl %r9d, %r9d +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .LBB0_1: # %for.cond +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: .LBB0_3: # %if.end56 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb $1, %sil ; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: # %bb.4: # %if.then64 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movb $1, (%r8) ; CHECK-NEXT: testl %r9d, %r9d ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: jmp .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_5: # %if.end70 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $12, (%rdx) -; CHECK-NEXT: .LBB0_6: # %dup2 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $2, (%rcx) -; CHECK-NEXT: testl %r9d, %r9d -; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .LBB0_8: # %for.end ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll index 60eec509637..4fc1f6023b8 100644 --- a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll +++ b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll @@ -115,8 +115,17 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; SSE-NEXT: jne .LBB0_4 ; SSE-NEXT: # %bb.5: # %middle.block ; SSE-NEXT: cmpq %rax, %rdx -; SSE-NEXT: je .LBB0_9 +; SSE-NEXT: jne .LBB0_6 +; SSE-NEXT: .LBB0_9: # %for.cond.cleanup +; SSE-NEXT: retq ; SSE-NEXT: .p2align 4, 0x90 +; SSE-NEXT: .LBB0_8: # %for.body +; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1 +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shll %cl, (%rdi,%rdx,4) +; SSE-NEXT: incq %rdx +; SSE-NEXT: cmpq %rdx, %rax +; SSE-NEXT: je .LBB0_9 ; SSE-NEXT: .LBB0_6: # %for.body ; SSE-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE-NEXT: cmpb $0, (%rsi,%rdx) @@ -125,15 +134,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; SSE-NEXT: # %bb.7: # %for.body ; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1 ; SSE-NEXT: movl %r8d, %ecx -; SSE-NEXT: .LBB0_8: # %for.body -; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1 -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shll %cl, (%rdi,%rdx,4) -; SSE-NEXT: incq %rdx -; SSE-NEXT: cmpq %rdx, %rax -; SSE-NEXT: jne .LBB0_6 -; SSE-NEXT: .LBB0_9: # %for.cond.cleanup -; SSE-NEXT: retq +; SSE-NEXT: jmp .LBB0_8 ; ; AVX1-LABEL: vector_variable_shift_left_loop: ; AVX1: # %bb.0: # %entry @@ -241,8 +242,19 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX1-NEXT: jne .LBB0_4 ; AVX1-NEXT: # %bb.5: # %middle.block ; AVX1-NEXT: cmpq %rax, %rdx -; AVX1-NEXT: je .LBB0_9 +; AVX1-NEXT: jne .LBB0_6 +; AVX1-NEXT: .LBB0_9: # %for.cond.cleanup +; AVX1-NEXT: addq $24, %rsp +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq ; AVX1-NEXT: .p2align 4, 0x90 +; AVX1-NEXT: .LBB0_8: # %for.body +; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1 +; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX1-NEXT: shll %cl, (%rdi,%rdx,4) +; AVX1-NEXT: incq %rdx +; AVX1-NEXT: cmpq %rdx, %rax +; AVX1-NEXT: je .LBB0_9 ; AVX1-NEXT: .LBB0_6: # %for.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX1-NEXT: cmpb $0, (%rsi,%rdx) @@ -251,17 +263,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX1-NEXT: # %bb.7: # %for.body ; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1 ; AVX1-NEXT: movl %r8d, %ecx -; AVX1-NEXT: .LBB0_8: # %for.body -; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1 -; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx -; AVX1-NEXT: shll %cl, (%rdi,%rdx,4) -; AVX1-NEXT: incq %rdx -; AVX1-NEXT: cmpq %rdx, %rax -; AVX1-NEXT: jne .LBB0_6 -; AVX1-NEXT: .LBB0_9: # %for.cond.cleanup -; AVX1-NEXT: addq $24, %rsp -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq +; AVX1-NEXT: jmp .LBB0_8 ; ; AVX2-LABEL: vector_variable_shift_left_loop: ; AVX2: # %bb.0: # %entry @@ -316,8 +318,18 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX2-NEXT: jne .LBB0_4 ; AVX2-NEXT: # %bb.5: # %middle.block ; AVX2-NEXT: cmpq %rax, %rdx -; AVX2-NEXT: je .LBB0_9 +; AVX2-NEXT: jne .LBB0_6 +; AVX2-NEXT: .LBB0_9: # %for.cond.cleanup +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; AVX2-NEXT: .p2align 4, 0x90 +; AVX2-NEXT: .LBB0_8: # %for.body +; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1 +; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX2-NEXT: shll %cl, (%rdi,%rdx,4) +; AVX2-NEXT: incq %rdx +; AVX2-NEXT: cmpq %rdx, %rax +; AVX2-NEXT: je .LBB0_9 ; AVX2-NEXT: .LBB0_6: # %for.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX2-NEXT: cmpb $0, (%rsi,%rdx) @@ -326,16 +338,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX2-NEXT: # %bb.7: # %for.body ; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1 ; AVX2-NEXT: movl %r8d, %ecx -; AVX2-NEXT: .LBB0_8: # %for.body -; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1 -; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx -; AVX2-NEXT: shll %cl, (%rdi,%rdx,4) -; AVX2-NEXT: incq %rdx -; AVX2-NEXT: cmpq %rdx, %rax -; AVX2-NEXT: jne .LBB0_6 -; AVX2-NEXT: .LBB0_9: # %for.cond.cleanup -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX2-NEXT: jmp .LBB0_8 entry: %cmp12 = icmp sgt i32 %count, 0 br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/X86/widen_arith-1.ll b/llvm/test/CodeGen/X86/widen_arith-1.ll index dd606bed523..a37dba14f22 100644 --- a/llvm/test/CodeGen/X86/widen_arith-1.ll +++ b/llvm/test/CodeGen/X86/widen_arith-1.ll @@ -7,9 +7,13 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind { ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl (%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -19,12 +23,8 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind { ; CHECK-NEXT: pextrb $2, %xmm1, 2(%ecx,%eax,4) ; CHECK-NEXT: pextrw $0, %xmm1, (%ecx,%eax,4) ; CHECK-NEXT: incl (%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl (%esp), %eax -; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: popl %eax ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/widen_arith-2.ll b/llvm/test/CodeGen/X86/widen_arith-2.ll index 7b1fcbb4f1b..cf76f66dad2 100644 --- a/llvm/test/CodeGen/X86/widen_arith-2.ll +++ b/llvm/test/CodeGen/X86/widen_arith-2.ll @@ -10,9 +10,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <4,4,4,4,4,4,4,4,u,u,u,u,u,u,u,u> -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl (%esp), %eax ; CHECK-NEXT: leal (,%eax,8), %ecx @@ -26,12 +30,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; CHECK-NEXT: pand %xmm1, %xmm2 ; CHECK-NEXT: movq %xmm2, (%edx,%eax,8) ; CHECK-NEXT: incl (%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl (%esp), %eax -; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll index ca86beda5c1..5b944beffde 100644 --- a/llvm/test/CodeGen/X86/widen_arith-3.ll +++ b/llvm/test/CodeGen/X86/widen_arith-3.ll @@ -17,9 +17,13 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $65537, {{[0-9]+}}(%esp) # imm = 0x10001 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl 16(%ebp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl 12(%ebp), %edx @@ -30,12 +34,8 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: pextrw $2, %xmm1, 4(%ecx,%eax,8) ; CHECK-NEXT: movd %xmm1, (%ecx,%eax,8) ; CHECK-NEXT: incl {{[0-9]+}}(%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpl 16(%ebp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/widen_arith-4.ll b/llvm/test/CodeGen/X86/widen_arith-4.ll index f25e73ef2a0..490783ef657 100644 --- a/llvm/test/CodeGen/X86/widen_arith-4.ll +++ b/llvm/test/CodeGen/X86/widen_arith-4.ll @@ -16,9 +16,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE2-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u> ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <2,4,2,2,2,u,u,u> -; SSE2-NEXT: jmp .LBB0_1 ; SSE2-NEXT: .p2align 4, 0x90 -; SSE2-NEXT: .LBB0_2: # %forbody +; SSE2-NEXT: .LBB0_1: # %forcond +; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: jge .LBB0_3 +; SSE2-NEXT: # %bb.2: # %forbody ; SSE2-NEXT: # in Loop: Header=BB0_1 Depth=1 ; SSE2-NEXT: movslq -{{[0-9]+}}(%rsp), %rax ; SSE2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx @@ -31,12 +35,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE2-NEXT: pextrw $4, %xmm2, %edx ; SSE2-NEXT: movw %dx, 8(%rcx,%rax) ; SSE2-NEXT: incl -{{[0-9]+}}(%rsp) -; SSE2-NEXT: .LBB0_1: # %forcond -; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: jl .LBB0_2 -; SSE2-NEXT: # %bb.3: # %afterfor +; SSE2-NEXT: jmp .LBB0_1 +; SSE2-NEXT: .LBB0_3: # %afterfor ; SSE2-NEXT: retq ; ; SSE41-LABEL: update: @@ -49,9 +49,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE41-NEXT: movw $0, -{{[0-9]+}}(%rsp) ; SSE41-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u> -; SSE41-NEXT: jmp .LBB0_1 ; SSE41-NEXT: .p2align 4, 0x90 -; SSE41-NEXT: .LBB0_2: # %forbody +; SSE41-NEXT: .LBB0_1: # %forcond +; SSE41-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE41-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; SSE41-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; SSE41-NEXT: jge .LBB0_3 +; SSE41-NEXT: # %bb.2: # %forbody ; SSE41-NEXT: # in Loop: Header=BB0_1 Depth=1 ; SSE41-NEXT: movslq -{{[0-9]+}}(%rsp), %rax ; SSE41-NEXT: movq -{{[0-9]+}}(%rsp), %rcx @@ -66,12 +70,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE41-NEXT: pextrw $4, %xmm1, 8(%rcx,%rax) ; SSE41-NEXT: movq %xmm2, (%rcx,%rax) ; SSE41-NEXT: incl -{{[0-9]+}}(%rsp) -; SSE41-NEXT: .LBB0_1: # %forcond -; SSE41-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE41-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; SSE41-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax -; SSE41-NEXT: jl .LBB0_2 -; SSE41-NEXT: # %bb.3: # %afterfor +; SSE41-NEXT: jmp .LBB0_1 +; SSE41-NEXT: .LBB0_3: # %afterfor ; SSE41-NEXT: retq entry: %dst.addr = alloca <5 x i16>* diff --git a/llvm/test/CodeGen/X86/widen_arith-5.ll b/llvm/test/CodeGen/X86/widen_arith-5.ll index 6e486bb2ace..2c705faed96 100644 --- a/llvm/test/CodeGen/X86/widen_arith-5.ll +++ b/llvm/test/CodeGen/X86/widen_arith-5.ll @@ -14,9 +14,13 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <3,3,3,u> -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx @@ -28,12 +32,8 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind { ; CHECK-NEXT: pextrd $2, %xmm1, 8(%rcx,%rax) ; CHECK-NEXT: movq %xmm1, (%rcx,%rax) ; CHECK-NEXT: incl -{{[0-9]+}}(%rsp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: retq entry: %dst.addr = alloca <3 x i32>* diff --git a/llvm/test/CodeGen/X86/widen_arith-6.ll b/llvm/test/CodeGen/X86/widen_arith-6.ll index c039096604e..3b24cb0194e 100644 --- a/llvm/test/CodeGen/X86/widen_arith-6.ll +++ b/llvm/test/CodeGen/X86/widen_arith-6.ll @@ -15,9 +15,13 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl $1065353216, {{[0-9]+}}(%esp) # imm = 0x3F800000 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1.97604004E+3,1.97604004E+3,1.97604004E+3,u> -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl 16(%ebp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl 8(%ebp), %ecx @@ -30,12 +34,8 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind { ; CHECK-NEXT: extractps $1, %xmm1, 4(%ecx,%eax) ; CHECK-NEXT: movss %xmm1, (%ecx,%eax) ; CHECK-NEXT: incl {{[0-9]+}}(%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpl 16(%ebp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/widen_cast-4.ll b/llvm/test/CodeGen/X86/widen_cast-4.ll index 9a2304ff467..f317d4b5913 100644 --- a/llvm/test/CodeGen/X86/widen_cast-4.ll +++ b/llvm/test/CodeGen/X86/widen_cast-4.ll @@ -11,9 +11,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; WIDE-NEXT: pcmpeqd %xmm0, %xmm0 ; WIDE-NEXT: movdqa {{.*#+}} xmm1 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] ; WIDE-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] -; WIDE-NEXT: jmp .LBB0_1 ; WIDE-NEXT: .p2align 4, 0x90 -; WIDE-NEXT: .LBB0_2: # %forbody +; WIDE-NEXT: .LBB0_1: # %forcond +; WIDE-NEXT: # =>This Inner Loop Header: Depth=1 +; WIDE-NEXT: movl (%esp), %eax +; WIDE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; WIDE-NEXT: jge .LBB0_3 +; WIDE-NEXT: # %bb.2: # %forbody ; WIDE-NEXT: # in Loop: Header=BB0_1 Depth=1 ; WIDE-NEXT: movl (%esp), %eax ; WIDE-NEXT: leal (,%eax,8), %ecx @@ -30,12 +34,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; WIDE-NEXT: psubb %xmm2, %xmm3 ; WIDE-NEXT: movq %xmm3, (%edx,%eax,8) ; WIDE-NEXT: incl (%esp) -; WIDE-NEXT: .LBB0_1: # %forcond -; WIDE-NEXT: # =>This Inner Loop Header: Depth=1 -; WIDE-NEXT: movl (%esp), %eax -; WIDE-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; WIDE-NEXT: jl .LBB0_2 -; WIDE-NEXT: # %bb.3: # %afterfor +; WIDE-NEXT: jmp .LBB0_1 +; WIDE-NEXT: .LBB0_3: # %afterfor ; WIDE-NEXT: addl $12, %esp ; WIDE-NEXT: retl entry: diff --git a/llvm/test/DebugInfo/X86/PR37234.ll b/llvm/test/DebugInfo/X86/PR37234.ll index 6f7388755ee..a0c8b91d624 100644 --- a/llvm/test/DebugInfo/X86/PR37234.ll +++ b/llvm/test/DebugInfo/X86/PR37234.ll @@ -21,18 +21,18 @@ ; CHECK-LABEL: # %bb.{{.*}}: ; CHECK: #DEBUG_VALUE: main:aa <- 0 ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG:[0-9a-z]+]] -; CHECK: jmp .LBB0_1 -; CHECK: .LBB0_2: +; CHECK: .LBB0_1: +; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] +; CHECK: je .LBB0_4 +; CHECK: # %bb.{{.*}}: ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] ; CHECK: jne .LBB0_1 ; CHECK: # %bb.{{.*}}: ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] ; CHECK: incl %[[REG]] ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] -; CHECK: .LBB0_1: -; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] -; CHECK: jne .LBB0_2 -; CHECK: # %bb.{{.*}}: +; CHECK: jmp .LBB0_1 +; CHECK: .LBB0_4: ; CHECK: #DEBUG_VALUE: main:aa <- $[[REG]] ; CHECK: retq diff --git a/llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll b/llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll index 50503fc2711..37eaffdc146 100644 --- a/llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll +++ b/llvm/test/DebugInfo/X86/dbg-value-transfer-order.ll @@ -24,6 +24,12 @@ ; with the Orders insertion point vector. ; CHECK-LABEL: f: # @f +; CHECK: .LBB0_4: +; Check that this DEBUG_VALUE comes before the left shift. +; CHECK: #DEBUG_VALUE: bit_offset <- $ecx +; CHECK: .cv_loc 0 1 8 28 # t.c:8:28 +; CHECK: movl $1, %[[reg:[^ ]*]] +; CHECK: shll %cl, %[[reg]] ; CHECK: .LBB0_2: # %while.body ; CHECK: movl $32, %ecx ; CHECK: testl {{.*}} @@ -31,12 +37,7 @@ ; CHECK: # %bb.3: # %if.then ; CHECK: callq if_then ; CHECK: movl %eax, %ecx -; CHECK: .LBB0_4: # %if.end -; Check that this DEBUG_VALUE comes before the left shift. -; CHECK: #DEBUG_VALUE: bit_offset <- $ecx -; CHECK: .cv_loc 0 1 8 28 # t.c:8:28 -; CHECK: movl $1, %[[reg:[^ ]*]] -; CHECK: shll %cl, %[[reg]] +; CHECK: jmp .LBB0_4 ; ModuleID = 't.c' source_filename = "t.c" |