summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2020-01-21 09:17:25 +0100
committerHans Wennborg <hans@chromium.org>2020-02-03 16:00:00 +0100
commit94c79ce5740f69aa9a9f5145c9911a61b7d20662 (patch)
treeea5a6547d78dc072a72a5f054867c2f16c119b57
parent4e9209ab592eb6dfbf076ba00aa8e1354cbc225a (diff)
downloadbcm5719-llvm-94c79ce5740f69aa9a9f5145c9911a61b7d20662.tar.gz
bcm5719-llvm-94c79ce5740f69aa9a9f5145c9911a61b7d20662.zip
Revert "[AMDGPU] Invert the handling of skip insertion."
This reverts commit 0dc6c249bffac9f23a605ce4e42a84341da3ddbd. The commit is reported to cause a regression in piglit/bin/glsl-vs-loop for Mesa. (cherry picked from commit a80291ce10ba9667352adcc895f9668144f5f616)
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertSkips.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp158
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll11
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll312
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-condition-and.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-relaxation.ll9
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-skip.ll9
-rw-r--r--llvm/test/CodeGen/AMDGPU/collapse-endcf.ll49
-rw-r--r--llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll15
-rw-r--r--llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll11
-rw-r--r--llvm/test/CodeGen/AMDGPU/else.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/hoist-cond.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/ret_jump.ll23
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir2
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-if-dead.ll13
-rw-r--r--llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-cfg.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/valu-i1.ll42
-rw-r--r--llvm/test/CodeGen/AMDGPU/wave32.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/wqm.ll5
40 files changed, 380 insertions, 390 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index fbed51de0ea..a55a1747caf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -156,9 +156,6 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;
-void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
-extern char &SIRemoveShortExecBranchesID;
-
void initializeSIInsertSkipsPass(PassRegistry &);
extern char &SIInsertSkipsPassID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index eb30d659bf0..c8dc6f6e3bf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -228,7 +228,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
- initializeSIRemoveShortExecBranchesPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
@@ -994,7 +993,6 @@ void GCNPassConfig::addPreEmitPass() {
// be better for it to emit S_NOP <N> when possible.
addPass(&PostRAHazardRecognizerID);
- addPass(&SIRemoveShortExecBranchesID);
addPass(&SIInsertSkipsPassID);
addPass(&BranchRelaxationPassID);
}
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 0b8eb4b25ae..3ed35e57e54 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -116,7 +116,6 @@ add_llvm_target(AMDGPUCodeGen
SIOptimizeExecMaskingPreRA.cpp
SIPeepholeSDWA.cpp
SIRegisterInfo.cpp
- SIRemoveShortExecBranches.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
GCNILPSched.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index 80c044ec00c..87e63fcc4a0 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
static cl::opt<unsigned> SkipThresholdFlag(
- "amdgpu-skip-threshold-legacy",
+ "amdgpu-skip-threshold",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
@@ -466,9 +466,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
- case AMDGPU::S_CBRANCH_EXECZ:
- ExecBranchStack.push_back(MI.getOperand(0).getMBB());
- break;
case AMDGPU::SI_MASK_BRANCH:
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 61d2719a3aa..bf052dc3c93 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
- // Insert the S_CBRANCH_EXECZ instruction which will be optimized later
- // during SIRemoveShortExecBranches.
- MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+ // Insert a pseudo terminator to help keep the verifier happy. This will also
+ // be used later when inserting skips.
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.add(MI.getOperand(2));
if (!LIS) {
@@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(DstReg);
MachineInstr *Branch =
- BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addMBB(DestBB);
+ BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ .addMBB(DestBB);
if (!LIS) {
MI.eraseFromParent();
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
deleted file mode 100644
index 51779e97ac6..00000000000
--- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass optmizes the s_cbranch_execz instructions.
-/// The pass removes this skip instruction for short branches,
-/// if there is no unwanted sideeffect in the fallthrough code sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-remove-short-exec-branches"
-
-static unsigned SkipThreshold;
-
-static cl::opt<unsigned, true> SkipThresholdFlag(
- "amdgpu-skip-threshold", cl::Hidden,
- cl::desc(
- "Number of instructions before jumping over divergent control flow"),
- cl::location(SkipThreshold), cl::init(12));
-
-namespace {
-
-class SIRemoveShortExecBranches : public MachineFunctionPass {
-private:
- const SIInstrInfo *TII = nullptr;
- bool getBlockDestinations(MachineBasicBlock &SrcMBB,
- MachineBasicBlock *&TrueMBB,
- MachineBasicBlock *&FalseMBB,
- SmallVectorImpl<MachineOperand> &Cond);
- bool mustRetainExeczBranch(const MachineBasicBlock &From,
- const MachineBasicBlock &To) const;
- bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
-
-public:
- static char ID;
-
- SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
- initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
- "SI remove short exec branches", false, false)
-
-char SIRemoveShortExecBranches::ID = 0;
-
-char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
-
-bool SIRemoveShortExecBranches::getBlockDestinations(
- MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
- MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
- if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
- return false;
-
- if (!FalseMBB)
- FalseMBB = SrcMBB.getNextNode();
-
- return true;
-}
-
-bool SIRemoveShortExecBranches::mustRetainExeczBranch(
- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
- unsigned NumInstr = 0;
- const MachineFunction *MF = From.getParent();
-
- for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
- MBBI != End && MBBI != ToI; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- // When a uniform loop is inside non-uniform control flow, the branch
- // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
- // when EXEC = 0. We should skip the loop lest it becomes infinite.
- if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
- I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
- return true;
-
- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
- return true;
-
- // These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
- I->getOpcode() == AMDGPU::S_WAITCNT)
- return true;
-
- ++NumInstr;
- if (NumInstr >= SkipThreshold)
- return true;
- }
- }
-
- return false;
-}
-
-// Returns true if the skip branch instruction is removed.
-bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
- MachineBasicBlock &SrcMBB) {
- MachineBasicBlock *TrueMBB = nullptr;
- MachineBasicBlock *FalseMBB = nullptr;
- SmallVector<MachineOperand, 1> Cond;
-
- if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
- return false;
-
- // Consider only the forward branches.
- if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
- mustRetainExeczBranch(*FalseMBB, *TrueMBB))
- return false;
-
- LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
- MI.eraseFromParent();
- SrcMBB.removeSuccessor(TrueMBB);
-
- return true;
-}
-
-bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- TII = ST.getInstrInfo();
- MF.RenumberBlocks();
- bool Changed = false;
-
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
- if (MBBI == MBB.end())
- continue;
-
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- case AMDGPU::S_CBRANCH_EXECZ:
- Changed = removeExeczBranch(MI, MBB);
- break;
- default:
- break;
- }
- }
-
- return Changed;
-}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index d787e40707b..40e18206702 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -10,8 +10,9 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; mask branch BB0_2
; CHECK-NEXT: s_cbranch_execz BB0_2
-; CHECK-NEXT: ; %bb.1: ; %if.true
+; CHECK-NEXT: BB0_1: ; %if.true
; CHECK-NEXT: global_load_dword v0, v[0:1], off
; CHECK-NEXT: BB0_2: ; %endif
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -37,10 +38,12 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; CHECK-NEXT: s_cbranch_execnz BB1_2
-; CHECK-NEXT: ; %bb.1: ; %if.true
+; CHECK-NEXT: ; mask branch BB1_2
+; CHECK-NEXT: BB1_1: ; %endif
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+; CHECK-NEXT: BB1_2: ; %if.true
; CHECK-NEXT: global_load_dword v0, v[0:1], off
-; CHECK-NEXT: BB1_2: ; %endif
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 84d39acb17f..2fec729a3da 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -24,8 +24,9 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB0_2
; GFX7LESS-NEXT: s_cbranch_execz BB0_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB0_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v1, local_var32@abs32@lo
; GFX7LESS-NEXT: v_mul_u32_u24_e64 v2, s4, 5
@@ -53,8 +54,9 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB0_2
; GFX8-NEXT: s_cbranch_execz BB0_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB0_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_u32_u24_e64 v1, s4, 5
; GFX8-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
@@ -83,8 +85,9 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB0_2
; GFX9-NEXT: s_cbranch_execz BB0_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB0_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_u32_u24_e64 v1, s4, 5
; GFX9-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
@@ -112,8 +115,9 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s3, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB0_2
; GFX1064-NEXT: s_cbranch_execz BB0_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB0_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
; GFX1064-NEXT: v_mul_u32_u24_e64 v1, s2, 5
@@ -144,8 +148,9 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB0_2
; GFX1032-NEXT: s_cbranch_execz BB0_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB0_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
; GFX1032-NEXT: v_mul_u32_u24_e64 v1, s3, 5
@@ -185,8 +190,9 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX7LESS-NEXT: ; mask branch BB1_2
; GFX7LESS-NEXT: s_cbranch_execz BB1_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB1_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s3, s[6:7]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_mul_i32 s3, s2, s3
@@ -218,8 +224,9 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB1_2
; GFX8-NEXT: s_cbranch_execz BB1_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB1_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s1, s[6:7]
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mul_i32 s1, s0, s1
@@ -251,8 +258,9 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB1_2
; GFX9-NEXT: s_cbranch_execz BB1_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB1_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s1, s[6:7]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mul_i32 s1, s0, s1
@@ -283,8 +291,9 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s3, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GFX1064-NEXT: ; mask branch BB1_2
; GFX1064-NEXT: s_cbranch_execz BB1_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB1_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v1, local_var32@abs32@lo
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -318,8 +327,9 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1032-NEXT: ; mask branch BB1_2
; GFX1032-NEXT: s_cbranch_execz BB1_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB1_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1032-NEXT: v_mov_b32_e32 v1, local_var32@abs32@lo
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -405,8 +415,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB2_2
; GFX8-NEXT: s_cbranch_execz BB2_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB2_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -458,8 +469,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB2_2
; GFX9-NEXT: s_cbranch_execz BB2_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB2_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -515,8 +527,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB2_2
; GFX1064-NEXT: s_cbranch_execz BB2_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB2_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -567,8 +580,9 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB2_2
; GFX1032-NEXT: s_cbranch_execz BB2_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB2_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -644,8 +658,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB3_2
; GFX8-NEXT: s_cbranch_execz BB3_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB3_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -697,8 +712,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB3_2
; GFX9-NEXT: s_cbranch_execz BB3_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB3_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -754,8 +770,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB3_2
; GFX1064-NEXT: s_cbranch_execz BB3_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB3_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -806,8 +823,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB3_2
; GFX1032-NEXT: s_cbranch_execz BB3_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB3_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -883,8 +901,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB4_2
; GFX8-NEXT: s_cbranch_execz BB4_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB4_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -936,8 +955,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB4_2
; GFX9-NEXT: s_cbranch_execz BB4_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB4_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -993,8 +1013,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB4_2
; GFX1064-NEXT: s_cbranch_execz BB4_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB4_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1045,8 +1066,9 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB4_2
; GFX1032-NEXT: s_cbranch_execz BB4_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB4_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -1085,8 +1107,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB5_2
; GFX7LESS-NEXT: s_cbranch_execz BB5_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB5_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
@@ -1120,8 +1143,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB5_2
; GFX8-NEXT: s_cbranch_execz BB5_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB5_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX8-NEXT: v_mul_u32_u24_e64 v1, s4, 5
@@ -1154,8 +1178,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB5_2
; GFX9-NEXT: s_cbranch_execz BB5_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB5_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX9-NEXT: v_mul_u32_u24_e64 v1, s4, 5
@@ -1187,8 +1212,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s5, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX1064-NEXT: ; mask branch BB5_2
; GFX1064-NEXT: s_cbranch_execz BB5_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB5_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
@@ -1221,8 +1247,9 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB5_2
; GFX1032-NEXT: s_cbranch_execz BB5_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB5_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: v_mul_hi_u32_u24_e64 v2, s3, 5
@@ -1263,8 +1290,9 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX7LESS-NEXT: ; mask branch BB6_2
; GFX7LESS-NEXT: s_cbranch_execz BB6_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB6_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
@@ -1307,8 +1335,9 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB6_2
; GFX8-NEXT: s_cbranch_execz BB6_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB6_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX8-NEXT: v_mov_b32_e32 v1, s6
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -1351,8 +1380,9 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB6_2
; GFX9-NEXT: s_cbranch_execz BB6_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB6_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX9-NEXT: v_mov_b32_e32 v1, s6
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -1394,8 +1424,9 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB6_2
; GFX1064-NEXT: s_cbranch_execz BB6_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB6_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -1436,8 +1467,9 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB6_2
; GFX1032-NEXT: s_cbranch_execz BB6_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB6_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s5, s5
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -1576,8 +1608,9 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB8_2
; GFX7LESS-NEXT: s_cbranch_execz BB8_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB8_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v1, local_var32@abs32@lo
; GFX7LESS-NEXT: v_mul_u32_u24_e64 v2, s4, 5
@@ -1606,8 +1639,9 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB8_2
; GFX8-NEXT: s_cbranch_execz BB8_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB8_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_u32_u24_e64 v1, s4, 5
; GFX8-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
@@ -1637,8 +1671,9 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB8_2
; GFX9-NEXT: s_cbranch_execz BB8_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB8_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_u32_u24_e64 v1, s4, 5
; GFX9-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
@@ -1667,8 +1702,9 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s3, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB8_2
; GFX1064-NEXT: s_cbranch_execz BB8_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB8_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
; GFX1064-NEXT: v_mul_u32_u24_e64 v1, s2, 5
@@ -1700,8 +1736,9 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB8_2
; GFX1032-NEXT: s_cbranch_execz BB8_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB8_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v2, local_var32@abs32@lo
; GFX1032-NEXT: v_mul_u32_u24_e64 v1, s3, 5
@@ -1742,8 +1779,9 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[0:1], vcc
+; GFX7LESS-NEXT: ; mask branch BB9_2
; GFX7LESS-NEXT: s_cbranch_execz BB9_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB9_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s3, s[6:7]
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
; GFX7LESS-NEXT: s_mul_i32 s3, s2, s3
@@ -1775,8 +1813,9 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB9_2
; GFX8-NEXT: s_cbranch_execz BB9_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB9_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s1, s[6:7]
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_mul_i32 s1, s0, s1
@@ -1808,8 +1847,9 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB9_2
; GFX9-NEXT: s_cbranch_execz BB9_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB9_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s1, s[6:7]
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mul_i32 s1, s0, s1
@@ -1840,8 +1880,9 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s3, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[6:7], vcc
+; GFX1064-NEXT: ; mask branch BB9_2
; GFX1064-NEXT: s_cbranch_execz BB9_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB9_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
; GFX1064-NEXT: v_mov_b32_e32 v1, local_var32@abs32@lo
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -1875,8 +1916,9 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s2, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s1, vcc_lo
+; GFX1032-NEXT: ; mask branch BB9_2
; GFX1032-NEXT: s_cbranch_execz BB9_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB9_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s2, s2
; GFX1032-NEXT: v_mov_b32_e32 v1, local_var32@abs32@lo
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -1962,8 +2004,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB10_2
; GFX8-NEXT: s_cbranch_execz BB10_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB10_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -2015,8 +2058,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB10_2
; GFX9-NEXT: s_cbranch_execz BB10_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB10_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2072,8 +2116,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB10_2
; GFX1064-NEXT: s_cbranch_execz BB10_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB10_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2124,8 +2169,9 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB10_2
; GFX1032-NEXT: s_cbranch_execz BB10_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB10_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2164,8 +2210,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB11_2
; GFX7LESS-NEXT: s_cbranch_execz BB11_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB11_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
@@ -2199,8 +2246,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB11_2
; GFX8-NEXT: s_cbranch_execz BB11_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB11_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX8-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX8-NEXT: v_mul_u32_u24_e64 v1, s4, 5
@@ -2234,8 +2282,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB11_2
; GFX9-NEXT: s_cbranch_execz BB11_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB11_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX9-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
; GFX9-NEXT: v_mul_u32_u24_e64 v1, s4, 5
@@ -2268,8 +2317,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s5, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX1064-NEXT: ; mask branch BB11_2
; GFX1064-NEXT: s_cbranch_execz BB11_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB11_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: v_mul_hi_u32_u24_e64 v2, s4, 5
@@ -2304,8 +2354,9 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s3, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB11_2
; GFX1032-NEXT: s_cbranch_execz BB11_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB11_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s3, s3
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: v_mul_hi_u32_u24_e64 v2, s3, 5
@@ -2348,8 +2399,9 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX7LESS-NEXT: ; mask branch BB12_2
; GFX7LESS-NEXT: s_cbranch_execz BB12_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB12_1:
; GFX7LESS-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX7LESS-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0)
@@ -2392,8 +2444,9 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB12_2
; GFX8-NEXT: s_cbranch_execz BB12_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB12_1:
; GFX8-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX8-NEXT: v_mov_b32_e32 v1, s6
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
@@ -2436,8 +2489,9 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr1_vgpr2
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB12_2
; GFX9-NEXT: s_cbranch_execz BB12_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB12_1:
; GFX9-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX9-NEXT: v_mov_b32_e32 v1, s6
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -2479,8 +2533,9 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
; GFX1064-NEXT: v_mbcnt_hi_u32_b32_e64 v0, s7, v0
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB12_2
; GFX1064-NEXT: s_cbranch_execz BB12_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB12_1:
; GFX1064-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; GFX1064-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
@@ -2521,8 +2576,9 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
; GFX1032-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s5, 0
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB12_2
; GFX1032-NEXT: s_cbranch_execz BB12_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB12_1:
; GFX1032-NEXT: s_bcnt1_i32_b32 s5, s5
; GFX1032-NEXT: v_mov_b32_e32 v3, local_var64@abs32@lo
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
@@ -2701,8 +2757,9 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB14_2
; GFX8-NEXT: s_cbranch_execz BB14_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB14_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -2754,8 +2811,9 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB14_2
; GFX9-NEXT: s_cbranch_execz BB14_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB14_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2811,8 +2869,9 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB14_2
; GFX1064-NEXT: s_cbranch_execz BB14_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB14_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2863,8 +2922,9 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB14_2
; GFX1032-NEXT: s_cbranch_execz BB14_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB14_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -2943,8 +3003,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB15_2
; GFX8-NEXT: s_cbranch_execz BB15_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB15_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -2996,8 +3057,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB15_2
; GFX9-NEXT: s_cbranch_execz BB15_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB15_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3053,8 +3115,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB15_2
; GFX1064-NEXT: s_cbranch_execz BB15_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB15_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3105,8 +3168,9 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB15_2
; GFX1032-NEXT: s_cbranch_execz BB15_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB15_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3185,8 +3249,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB16_2
; GFX8-NEXT: s_cbranch_execz BB16_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB16_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -3238,8 +3303,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB16_2
; GFX9-NEXT: s_cbranch_execz BB16_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB16_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3295,8 +3361,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB16_2
; GFX1064-NEXT: s_cbranch_execz BB16_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB16_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3347,8 +3414,9 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB16_2
; GFX1032-NEXT: s_cbranch_execz BB16_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB16_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3427,8 +3495,9 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB17_2
; GFX8-NEXT: s_cbranch_execz BB17_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB17_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -3480,8 +3549,9 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB17_2
; GFX9-NEXT: s_cbranch_execz BB17_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB17_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3537,8 +3607,9 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB17_2
; GFX1064-NEXT: s_cbranch_execz BB17_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB17_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3589,8 +3660,9 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB17_2
; GFX1032-NEXT: s_cbranch_execz BB17_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB17_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3629,8 +3701,9 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB18_2
; GFX7LESS-NEXT: s_cbranch_execz BB18_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB18_1:
; GFX7LESS-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
@@ -3666,8 +3739,9 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB18_2
; GFX8-NEXT: s_cbranch_execz BB18_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB18_1:
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v1, 0
@@ -3703,8 +3777,9 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB18_2
; GFX9-NEXT: s_cbranch_execz BB18_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB18_1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v1, 0
@@ -3739,8 +3814,9 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX1064-NEXT: ; mask branch BB18_2
; GFX1064-NEXT: s_cbranch_execz BB18_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB18_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
@@ -3775,8 +3851,9 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB18_2
; GFX1032-NEXT: s_cbranch_execz BB18_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB18_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
@@ -3859,8 +3936,9 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB19_2
; GFX8-NEXT: s_cbranch_execz BB19_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB19_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -3912,8 +3990,9 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB19_2
; GFX9-NEXT: s_cbranch_execz BB19_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB19_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -3969,8 +4048,9 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB19_2
; GFX1064-NEXT: s_cbranch_execz BB19_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB19_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4021,8 +4101,9 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB19_2
; GFX1032-NEXT: s_cbranch_execz BB19_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB19_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4061,8 +4142,9 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB20_2
; GFX7LESS-NEXT: s_cbranch_execz BB20_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB20_1:
; GFX7LESS-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
@@ -4098,8 +4180,9 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB20_2
; GFX8-NEXT: s_cbranch_execz BB20_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB20_1:
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v1, 0
@@ -4135,8 +4218,9 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB20_2
; GFX9-NEXT: s_cbranch_execz BB20_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB20_1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v1, 0
@@ -4171,8 +4255,9 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX1064-NEXT: ; mask branch BB20_2
; GFX1064-NEXT: s_cbranch_execz BB20_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB20_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
@@ -4207,8 +4292,9 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB20_2
; GFX1032-NEXT: s_cbranch_execz BB20_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB20_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
@@ -4291,8 +4377,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB21_2
; GFX8-NEXT: s_cbranch_execz BB21_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB21_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -4344,8 +4431,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB21_2
; GFX9-NEXT: s_cbranch_execz BB21_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB21_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4401,8 +4489,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB21_2
; GFX1064-NEXT: s_cbranch_execz BB21_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB21_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4453,8 +4542,9 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB21_2
; GFX1032-NEXT: s_cbranch_execz BB21_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB21_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4493,8 +4583,9 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB22_2
; GFX7LESS-NEXT: s_cbranch_execz BB22_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB22_1:
; GFX7LESS-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
@@ -4529,8 +4620,9 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB22_2
; GFX8-NEXT: s_cbranch_execz BB22_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB22_1:
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v1, 0
@@ -4565,8 +4657,9 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB22_2
; GFX9-NEXT: s_cbranch_execz BB22_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB22_1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v1, 0
@@ -4600,8 +4693,9 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX1064-NEXT: ; mask branch BB22_2
; GFX1064-NEXT: s_cbranch_execz BB22_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB22_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
@@ -4636,8 +4730,9 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB22_2
; GFX1032-NEXT: s_cbranch_execz BB22_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB22_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
@@ -4720,8 +4815,9 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX8-NEXT: ; implicit-def: $vgpr0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX8-NEXT: ; mask branch BB23_2
; GFX8-NEXT: s_cbranch_execz BB23_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB23_1:
; GFX8-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v3, s2
; GFX8-NEXT: s_mov_b32 m0, -1
@@ -4773,8 +4869,9 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
; GFX9-NEXT: ; implicit-def: $vgpr0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT: ; mask branch BB23_2
; GFX9-NEXT: s_cbranch_execz BB23_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB23_1:
; GFX9-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v3, s2
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4830,8 +4927,9 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
; GFX1064-NEXT: ; implicit-def: $vgpr0
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX1064-NEXT: ; mask branch BB23_2
; GFX1064-NEXT: s_cbranch_execz BB23_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB23_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v7, s3
; GFX1064-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4882,8 +4980,9 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v4
; GFX1032-NEXT: ; implicit-def: $vgpr0
; GFX1032-NEXT: s_and_saveexec_b32 s4, vcc_lo
+; GFX1032-NEXT: ; mask branch BB23_2
; GFX1032-NEXT: s_cbranch_execz BB23_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB23_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, local_var32@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v7, s3
; GFX1032-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -4922,8 +5021,9 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX7LESS-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX7LESS-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX7LESS-NEXT: ; mask branch BB24_2
; GFX7LESS-NEXT: s_cbranch_execz BB24_2
-; GFX7LESS-NEXT: ; %bb.1:
+; GFX7LESS-NEXT: BB24_1:
; GFX7LESS-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX7LESS-NEXT: v_mov_b32_e32 v0, 5
; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0
@@ -4958,8 +5058,9 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX8-NEXT: ; mask branch BB24_2
; GFX8-NEXT: s_cbranch_execz BB24_2
-; GFX8-NEXT: ; %bb.1:
+; GFX8-NEXT: BB24_1:
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX8-NEXT: v_mov_b32_e32 v1, 0
@@ -4994,8 +5095,9 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX9-NEXT: ; mask branch BB24_2
; GFX9-NEXT: s_cbranch_execz BB24_2
-; GFX9-NEXT: ; %bb.1:
+; GFX9-NEXT: BB24_1:
; GFX9-NEXT: v_mov_b32_e32 v0, 5
; GFX9-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX9-NEXT: v_mov_b32_e32 v1, 0
@@ -5029,8 +5131,9 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX1064-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
+; GFX1064-NEXT: ; mask branch BB24_2
; GFX1064-NEXT: s_cbranch_execz BB24_2
-; GFX1064-NEXT: ; %bb.1:
+; GFX1064-NEXT: BB24_1:
; GFX1064-NEXT: v_mov_b32_e32 v0, 5
; GFX1064-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1064-NEXT: v_mov_b32_e32 v1, 0
@@ -5065,8 +5168,9 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX1032-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX1032-NEXT: s_and_saveexec_b32 s2, vcc_lo
+; GFX1032-NEXT: ; mask branch BB24_2
; GFX1032-NEXT: s_cbranch_execz BB24_2
-; GFX1032-NEXT: ; %bb.1:
+; GFX1032-NEXT: BB24_1:
; GFX1032-NEXT: v_mov_b32_e32 v0, 5
; GFX1032-NEXT: v_mov_b32_e32 v2, local_var64@abs32@lo
; GFX1032-NEXT: v_mov_b32_e32 v1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 39c2d997eb8..81d045b14d9 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -11,7 +11,7 @@ declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1)
; Show that what the atomic optimization pass will do for raw buffers.
; GCN-LABEL: add_i32_constant:
-; %bb.{{[0-9]+}}:
+; GCN-LABEL: BB0_1:
; GCN32: v_cmp_ne_u32_e64 s[[exec_lo:[0-9]+]], 1, 0
; GCN64: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt:[0-9]+]], s[[exec_lo]], 0
diff --git a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
index 4f03978c515..d1fcd1547d5 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
@@ -14,11 +14,12 @@
; GCN-DAG: v_cmp_lt_f32_e32 vcc,
; GCN: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[OTHERCC]]
; GCN: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[AND]]
+; GCN: ; mask branch [[BB5:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %bb4
+; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %bb4
; GCN: ds_write_b32
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: [[BB5]]
; GCN-NEXT: s_endpgm
; GCN-NEXT: .Lfunc_end
define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 1131f5f3c19..a2facaafb41 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -389,6 +389,7 @@ bb3:
; GCN-LABEL: {{^}}uniform_inside_divergent:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
+; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]]
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry
@@ -400,7 +401,7 @@ bb3:
; GCN-NEXT: [[IF]]: ; %if
; GCN: buffer_store_dword
; GCN: s_cmp_lg_u32
-; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_scc1 [[ENDIF]]
; GCN-NEXT: ; %bb.2: ; %if_uniform
; GCN: buffer_store_dword
@@ -437,10 +438,12 @@ endif:
; GCN: v_cmp_nlt_f32_e32 vcc
; GCN-NEXT: s_and_saveexec_b64 [[TEMP_MASK:s\[[0-9]+:[0-9]+\]]], vcc
; GCN-NEXT: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[TEMP_MASK]]
+; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
-; GCN: BB{{[0-9]+_[0-9]+}}: ; %Flow
+; GCN: [[FLOW]]: ; %Flow
; GCN-NEXT: s_or_saveexec_b64 [[TEMP_MASK1:s\[[0-9]+:[0-9]+\]]], [[MASK]]
; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
+; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
; GCN: ;;#ASMSTART
@@ -451,7 +454,7 @@ endif:
; GCN: v_nop_e64
; GCN: v_nop_e64
; GCN: ;;#ASMEND
-; GCN: s_cbranch_vccz [[RET:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_vccz [[RET]]
; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index d99226ac45b..cd963df6c49 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -8,7 +8,8 @@ define hidden void @func() #1 {
; GCN-LABEL: {{^}}if_call:
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
; GCN: s_swappc_b64
; GCN: [[END]]:
define void @if_call(i32 %flag) #0 {
@@ -25,7 +26,8 @@ end:
; GCN-LABEL: {{^}}if_asm:
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
; GCN: ; sample asm
; GCN: [[END]]:
define void @if_asm(i32 %flag) #0 {
@@ -42,7 +44,8 @@ end:
; GCN-LABEL: {{^}}if_call_kernel:
; GCN: s_and_saveexec_b64
-; GCN-NEXT: s_cbranch_execz BB3_2
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
; GCN: s_swappc_b64
define amdgpu_kernel void @if_call_kernel() #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index bb00e67fca2..6a8456d99bc 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -3,10 +3,12 @@
; ALL-LABEL: {{^}}simple_nested_if:
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
+; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF]]
; GCN: s_and_b64 exec, exec, vcc
+; GCN-NEXT: ; mask branch [[ENDIF]]
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
@@ -41,10 +43,12 @@ bb.outer.end: ; preds = %bb.outer.then, %bb.
; ALL-LABEL: {{^}}uncollapsable_nested_if:
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
@@ -84,16 +88,18 @@ bb.outer.end: ; preds = %bb.inner.then, %bb
; ALL-LABEL: {{^}}nested_if_if_else:
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
-; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[THEN_INNER]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[THEN_INNER]]:
; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
@@ -131,24 +137,28 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
; ALL-LABEL: {{^}}nested_if_else_if:
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
-; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
; GCN-NEXT: {{^}}[[THEN_OUTER]]:
; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[FLOW1]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: [[FLOW1]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]]
@@ -193,8 +203,9 @@ bb.outer.end:
; ALL-LABEL: {{^}}s_endpgm_unsafe_barrier:
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
-; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}:
+; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF]]
+; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index f144ed263ff..15e807a3e02 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -35,9 +35,9 @@
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
-; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
+; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
-; GCN: ; %bb.{{[0-9]+}}: ; %if
+; GCN: {{^}}BB{{[0-9]+}}_1: ; %if
; GCN: s_mov_b32 m0, -1
; GCN: ds_read_b32 [[LOAD1:v[0-9]+]]
; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
@@ -116,7 +116,8 @@ endif:
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
-; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
@@ -193,7 +194,8 @@ end:
; GCN: s_mov_b64 exec, [[CMP0]]
; FIXME: It makes no sense to put this skip here
-; GCN: s_cbranch_execz [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
+; GCN: s_cbranch_execz [[FLOW]]
; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]]
; GCN: [[FLOW]]: ; %Flow
@@ -227,10 +229,11 @@ end:
; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}
-; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[ENDIF]]
-; GCN: ; %bb.{{[0-9]+}}: ; %if
+; GCN: BB{{[0-9]+}}_2: ; %if
; GCN: ds_read_b32
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
diff --git a/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
index 2ba4d0cf1d9..80907bf1c1b 100644
--- a/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
+++ b/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
@@ -4,8 +4,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() #0
; GCN-LABEL: {{^}}convergent_inlineasm:
; GCN: %bb.0:
; GCN: v_cmp_ne_u32_e64
-; GCN: s_cbranch_execz
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: ; mask branch
+; GCN: BB{{[0-9]+_[0-9]+}}:
define amdgpu_kernel void @convergent_inlineasm(i64 addrspace(1)* nocapture %arg) {
bb:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
@@ -23,9 +23,9 @@ bb5: ; preds = %bb3, %bb
}
; GCN-LABEL: {{^}}nonconvergent_inlineasm:
-; GCN: s_cbranch_execz
+; GCN: ; mask branch
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: BB{{[0-9]+_[0-9]+}}:
; GCN: v_cmp_ne_u32_e64
; GCN: BB{{[0-9]+_[0-9]+}}:
diff --git a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
index 5209b2bf7f3..6b3491b0c75 100644
--- a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
+++ b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
@@ -8,7 +8,7 @@
; CHECK: s_mov_b32 [[SREG:s[0-9]+]], 1.0
; CHECK: %bb.1:
; CHECK-NOT: v_mov_b32_e32 {{v[0-9]+}}, 1.0
-; CHECK: BB0_3:
+; CHECK: BB0_4:
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, [[SREG]]
define amdgpu_ps void @mov_opt(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 563b5dcead5..895539c00bc 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -32,6 +32,7 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_and_b64 s[8:9], s[8:9], exec
; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[2:3]
+; CHECK-NEXT: s_cbranch_execz BB0_6
; CHECK-NEXT: BB0_3: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
@@ -43,19 +44,21 @@ define amdgpu_ps void @main(i32, float) {
; CHECK-NEXT: s_mov_b64 s[6:7], -1
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], vcc
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
+; CHECK-NEXT: ; mask branch BB0_1
; CHECK-NEXT: s_cbranch_execz BB0_1
-; CHECK-NEXT: ; %bb.5: ; %endif2
+; CHECK-NEXT: BB0_5: ; %endif2
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: s_add_i32 s0, s0, 1
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
; CHECK-NEXT: s_branch BB0_1
-; CHECK-NEXT: ; %bb.6: ; %Flow2
+; CHECK-NEXT: BB0_6: ; %Flow2
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[4:5]
-; CHECK-NEXT: ; %bb.7: ; %if1
+; CHECK-NEXT: ; mask branch BB0_8
+; CHECK-NEXT: BB0_7: ; %if1
; CHECK-NEXT: v_sqrt_f32_e32 v1, v0
-; CHECK-NEXT: ; %bb.8: ; %endloop
+; CHECK-NEXT: BB0_8: ; %endloop
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
; CHECK-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index e641f4263bb..38c9379fe2d 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -5,6 +5,7 @@
; CHECK: ; %Flow
; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]],
; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]]
+; CHECK-NEXT: ; mask branch
define amdgpu_ps float @else_no_execfix(i32 %z, float %v) #0 {
main_body:
%cc = icmp sgt i32 %z, 5
@@ -31,7 +32,7 @@ end:
; CHECK-NEXT: s_and_b64 exec, exec, [[INIT_EXEC]]
; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]]
; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]]
-; CHECK-NEXT: s_cbranch_execz
+; CHECK-NEXT: ; mask branch
define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) #0 {
main_body:
%cc = icmp sgt i32 %z, 5
diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
index 08936730fc3..76a26882987 100644
--- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
@@ -9,7 +9,7 @@
; CHECK-NOT: v_cmp
; CHECK_NOT: v_cndmask
; CHECK: s_and_saveexec_b64 s[{{[[0-9]+:[0-9]+}}], [[COND]]
-; CHECK: ; %bb.2:
+; CHECK: BB0_2:
define amdgpu_kernel void @hoist_cond(float addrspace(1)* nocapture %arg, float addrspace(1)* noalias nocapture readonly %arg1, i32 %arg3, i32 %arg4) {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir
index 76bb74d3a63..b305cfddb5a 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
index 11051198f23..c84372086fd 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
---
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
index a0c0a6f2052..7da59df5d80 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=2 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-skips -amdgpu-skip-threshold=2 %s -o - | FileCheck %s
---
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir
index c8832caf616..bcc6e12d4ee 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold-legacy=1 %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-insert-skips -amdgpu-skip-threshold=1 %s -o - | FileCheck %s
# https://bugs.freedesktop.org/show_bug.cgi?id=99019
--- |
define amdgpu_ps void @kill_uncond_branch() {
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 3b5d4d56289..7864ca31112 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -158,7 +158,7 @@ entry:
; W64: s_mov_b64 exec, [[SAVEEXEC]]
; W64: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
-; W64: ; %bb.{{[0-9]+}}:
+; W64: BB{{[0-9]+_[0-9]+}}:
; W64-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4
; W64-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
@@ -204,7 +204,7 @@ entry:
; W32: s_mov_b32 exec_lo, [[SAVEEXEC]]
; W32: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
-; W32: ; %bb.{{[0-9]+}}:
+; W32: BB{{[0-9]+_[0-9]+}}:
; W32-DAG: v_mov_b32_e32 [[IDX:v[0-9]+]], s4
; W32-DAG: s_mov_b32 [[SAVEEXEC:s[0-9]+]], exec_lo
@@ -270,7 +270,7 @@ entry:
; W64-O0: buffer_store_dword [[RES]], off, s[0:3], s32 offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]]
-; W64-O0: ; %bb.{{[0-9]+}}:
+; W64-O0: BB{{[0-9]+_[0-9]+}}:
; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec
; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index d6b717411de..af34b0f3987 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -58,8 +58,9 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v5
; GFX9-NEXT: v_cmp_lt_u32_e64 s[4:5], v0, v1
; GFX9-NEXT: s_and_saveexec_b64 s[10:11], s[4:5]
+; GFX9-NEXT: ; mask branch BB1_4
; GFX9-NEXT: s_cbranch_execz BB1_4
-; GFX9-NEXT: ; %bb.1: ; %bb19
+; GFX9-NEXT: BB1_1: ; %bb19
; GFX9-NEXT: v_cvt_f32_u32_e32 v7, v6
; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v6
; GFX9-NEXT: v_add_u32_e32 v6, v4, v0
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index 84749170dc6..ffa851919f7 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -11,11 +11,12 @@
; GCN-NEXT: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
+; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
-; GCN: ; %bb.{{[0-9]+}}: ; %unreachable.bb
+; GCN: BB{{[0-9]+_[0-9]+}}: ; %unreachable.bb
; GCN-NEXT: ; divergent unreachable
-; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %Flow
+; GCN-NEXT: {{^}}[[FLOW]]: ; %Flow
; GCN-NEXT: s_or_b64 exec, exec
; GCN-NEXT: [[RET_BB]]:
@@ -54,17 +55,11 @@ ret.bb: ; preds = %else, %main_body
}
; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable:
-; GCN: s_cbranch_vccz
+; GCN: s_cbranch_vccnz [[RET_BB:BB[0-9]+_[0-9]+]]
-; GCN: ; %bb.{{[0-9]+}}: ; %Flow
-; GCN: s_cbranch_execnz [[RETURN:BB[0-9]+_[0-9]+]]
-
-; GCN: ; %UnifiedReturnBlock
-; GCN-NEXT: s_or_b64 exec, exec
-; GCN-NEXT: s_waitcnt
-
-; GCN: BB{{[0-9]+_[0-9]+}}: ; %else
+; GCN: ; %bb.{{[0-9]+}}: ; %else
; GCN: s_and_saveexec_b64 [[SAVE_EXEC:s\[[0-9]+:[0-9]+\]]], vcc
+; GCN-NEXT: ; mask branch [[FLOW1:BB[0-9]+_[0-9]+]]
; GCN-NEXT: ; %unreachable.bb
; GCN: ds_write_b32
@@ -72,6 +67,12 @@ ret.bb: ; preds = %else, %main_body
; GCN: ; %ret.bb
; GCN: store_dword
+
+; GCN: ; %UnifiedReturnBlock
+; GCN-NEXT: s_or_b64 exec, exec
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: ; return
+; GCN-NEXT: .Lfunc_end
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <8 x i32>] addrspace(4)* inreg %arg2, i32 addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
main_body:
%i.i = extractelement <2 x i32> %arg7, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
index e854c089268..a99d18147cc 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
@@ -40,6 +40,8 @@ bb5: ; preds = %bb3, %bb1
; GCN: load_dwordx4
; GCN: v_cmp_nlt_f32
; GCN: s_and_saveexec_b64
+; GCN: ; mask branch [[UNIFIED_RET:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: [[UNIFIED_RET]]:
; GCN-NEXT: s_endpgm
; GCN: .Lfunc_end
define amdgpu_kernel void @annotate_ret_noloop(<4 x float> addrspace(1)* noalias nocapture readonly %arg) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index 64f1824b890..ce85a666340 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -3,12 +3,13 @@
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator:
; GCN: v_cmp_eq_u32
; GCN: s_and_saveexec_b64
+; GCN: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
+; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %UnifiedReturnBlock
+; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
; GCN: s_endpgm
define amdgpu_kernel void @lower_control_flow_unreachable_terminator() #0 {
@@ -28,12 +29,13 @@ ret:
; GCN-LABEL: {{^}}lower_control_flow_unreachable_terminator_swap_block_order:
; GCN: v_cmp_ne_u32
; GCN: s_and_saveexec_b64
+; GCN: ; mask branch [[RETURN:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: ; %bb.{{[0-9]+}}: ; %unreachable
+; GCN-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: [[RETURN]]:
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @lower_control_flow_unreachable_terminator_swap_block_order() #0 {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
index b360f3aa5ff..fdb0c465c20 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow.mir
@@ -32,7 +32,7 @@ body: |
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_]], [[COPY]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
- ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; GCN: SI_MASK_BRANCH %bb.2, implicit $exec
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
index 9574edd0af9..554094cf9c0 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold-legacy=1000000 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-insert-skips -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
---
name: skip_branch_taildup_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
index 7b146e03178..bce4023316f 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
@@ -5,8 +5,9 @@
; An s_cbranch_execnz is required to avoid trapping if all lanes are 0
; GCN-LABEL: {{^}}trap_divergent_branch:
; GCN: s_and_saveexec_b64
-; GCN: s_cbranch_execnz [[TRAP:BB[0-9]+_[0-9]+]]
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: s_cbranch_execz [[ENDPGM:BB[0-9]+_[0-9]+]]
+; GCN: s_branch [[TRAP:BB[0-9]+_[0-9]+]]
+; GCN: [[ENDPGM]]:
; GCN-NEXT: s_endpgm
; GCN: [[TRAP]]:
; GCN: s_trap 2
@@ -29,7 +30,7 @@ end:
; GCN-LABEL: {{^}}debugtrap_divergent_branch:
; GCN: s_and_saveexec_b64
; GCN: s_cbranch_execz [[ENDPGM:BB[0-9]+_[0-9]+]]
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: BB{{[0-9]+}}_{{[0-9]+}}:
; GCN: s_trap 3
; GCN-NEXT: [[ENDPGM]]:
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 115782863ef..e848e75fc00 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -220,9 +220,10 @@ exit:
; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0
; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
-; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: s_cbranch_execz [[EXIT]]
-; CHECK: ; %bb.{{[0-9]+}}: ; %bb.preheader
+; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader
; CHECK: s_mov_b32
; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
@@ -356,18 +357,20 @@ bb7: ; preds = %bb4
; CHECK: ; %bb.0:
; CHECK: s_and_saveexec_b64
; CHECK: s_xor_b64
+; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]]
; CHECK: v_cmpx_gt_f32_e32 vcc, 0,
-; CHECK: BB{{[0-9]+_[0-9]+}}:
+; CHECK: [[BB4]]:
; CHECK: s_or_b64 exec, exec
; CHECK: image_sample_c
; CHECK: v_cmp_neq_f32_e32 vcc, 0,
; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc
-; CHECK-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
+; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]]
+; CHECK-NEXT: s_cbranch_execz [[END]]
; CHECK-NOT: branch
-; CHECK: ; %bb.{{[0-9]+}}: ; %bb8
+; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8
; CHECK: buffer_store_dword
; CHECK: [[END]]:
diff --git a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
index c376886a3e8..4ba16b4eb30 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
-; GCN-LABEL: ; %bb.0:
+; GCN-LABEL: BB0_1
; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[ADDR_LO:[0-9]+]]{{\:}}[[ADDR_HI:[0-9]+]]{{\]}}, 0x0
; GCN: s_waitcnt lgkmcnt(0)
; GCN: global_store_dword v{{\[}}[[ADDR_LO]]{{\:}}[[ADDR_HI]]{{\]}}, v{{[0-9]+}}, off
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index be60a34b420..00ae166a6ce 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -28,8 +28,9 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
+; GCN-NEXT: ; mask branch BB0_2
; GCN-NEXT: s_cbranch_execz BB0_2
-; GCN-NEXT: ; %bb.1: ; %if.then4.i
+; GCN-NEXT: BB0_1: ; %if.then4.i
; GCN-NEXT: buffer_load_dword v0, v32, s[36:39], s32 offen
; GCN-NEXT: buffer_load_dword v1, v32, s[36:39], s32 offen offset:4
; GCN-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 70b7d06e442..3127201e922 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -18,13 +18,14 @@ define amdgpu_kernel void @foobar(float %a0, float %a1, float addrspace(1)* %out
; CHECK-NEXT: v_mov_b32_e32 v2, s6
; CHECK-NEXT: v_mov_b32_e32 v3, s7
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
-; CHECK-NEXT: ; %bb.1: ; %ift
+; CHECK-NEXT: ; mask branch BB0_2
+; CHECK-NEXT: BB0_1: ; %ift
; CHECK-NEXT: s_mov_b32 s4, s5
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: v_mov_b32_e32 v2, s6
; CHECK-NEXT: v_mov_b32_e32 v3, s7
-; CHECK-NEXT: ; %bb.2: ; %ife
+; CHECK-NEXT: BB0_2: ; %ife
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_mov_b32 s3, 0xf000
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index 2c64b1bdb3d..96552634065 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -335,7 +335,7 @@ endif:
; GCN: [[IF_LABEL]]:
; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
-; GCN: s_cbranch_execz [[ENDIF_LABEL]]
+; GCN: ; mask branch [[ENDIF_LABEL]]
; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
; GCN: buffer_store_dword [[ONE]]
; GCN: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index a23eb2b137d..a1cf6cf6300 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -5,6 +5,7 @@
; CHECK-LABEL: {{^}}test1:
; CHECK: v_cmp_ne_u32_e32 vcc, 0
; CHECK: s_and_saveexec_b64
+; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}}
; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: ; %loop_body
@@ -32,6 +33,7 @@ out:
; CHECK-LABEL: {{^}}test2:
; CHECK: s_and_saveexec_b64
+; CHECK-NEXT: ; mask branch
; CHECK-NEXT: s_cbranch_execz
define amdgpu_kernel void @test2(i32 addrspace(1)* %out, i32 %a, i32 %b) {
main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 481929a0438..ea74268dbe7 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -13,16 +13,19 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; SI-NEXT: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0
; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc
; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]]
+; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]]
+; SI-NEXT: s_cbranch_execz [[FLOW_BB]]
-; SI-NEXT: ; %bb.{{[0-9]+}}: ; %LeafBlock3
+; SI-NEXT: BB{{[0-9]+}}_1: ; %LeafBlock3
; SI: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
; SI: s_and_saveexec_b64
-; SI-NEXT: s_cbranch_execnz
+; SI-NEXT: ; mask branch
; v_mov should be after exec modification
-; SI: ; %bb.{{[0-9]+}}:
+; SI: [[FLOW_BB]]:
; SI-NEXT: s_or_saveexec_b64 [[SAVE3:s\[[0-9]+:[0-9]+\]]], [[SAVE2]]
; SI-NEXT: s_xor_b64 exec, exec, [[SAVE3]]
+; SI-NEXT: ; mask branch
;
define amdgpu_kernel void @test_if(i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
entry:
@@ -62,9 +65,10 @@ end:
; SI-LABEL: {{^}}simple_test_v_if:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
-; SI-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: s_cbranch_execz [[EXIT]]
-; SI-NEXT: ; %bb.{{[0-9]+}}:
+; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
; SI-NEXT: {{^}}[[EXIT]]:
@@ -88,9 +92,10 @@ exit:
; SI-LABEL: {{^}}simple_test_v_if_ret_else_ret:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
-; SI-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]]
+; SI-NEXT: s_cbranch_execz [[EXIT]]
-; SI-NEXT: ; %bb.{{[0-9]+}}:
+; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
; SI-NEXT: {{^}}[[EXIT]]:
@@ -117,22 +122,23 @@ exit:
; SI: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
-; SI: s_cbranch_execnz [[EXIT:BB[0-9]+_[0-9]+]]
+; SI: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
-; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %Flow
+; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %exit
+; SI: ds_write_b32
+
+; SI-NEXT: {{^}}[[FLOW]]:
; SI-NEXT: s_or_saveexec_b64
; SI-NEXT: s_xor_b64 exec, exec
-; SI-NEXT: s_cbranch_execz [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]]
+; SI-NEXT: ; mask branch [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]]
+; SI-NEXT: s_cbranch_execz [[UNIFIED_RETURN]]
-; SI-NEXT: ; %bb.{{[0-9]+}}: ; %then
+; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %then
; SI: s_waitcnt
; SI-NEXT: buffer_store_dword
; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock
; SI: s_endpgm
-
-; SI-NEXT: {{^}}[[EXIT]]:
-; SI: ds_write_b32
define amdgpu_kernel void @simple_test_v_if_ret_else_code_ret(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%is.0 = icmp ne i32 %tid, 0
@@ -151,6 +157,7 @@ exit:
; SI-LABEL: {{^}}simple_test_v_loop:
; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}}
; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
+; SI-NEXT: ; mask branch
; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
@@ -192,10 +199,11 @@ exit:
; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
; SI: v_cmp_lt_i32_e32 vcc
; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
+; SI-NEXT: ; mask branch
; SI-NEXT: s_cbranch_execz [[LABEL_EXIT:BB[0-9]+_[0-9]+]]
; Initialize inner condition to false
-; SI: ; %bb.{{[0-9]+}}: ; %bb10.preheader
+; SI: BB{{[0-9]+_[0-9]+}}: ; %bb10.preheader
; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], 0{{$}}
; Clear exec bits for workitems that load -1s
@@ -206,9 +214,9 @@ exit:
; SI-DAG: v_cmp_ne_u32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
-; SI: s_cbranch_execz [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
+; SI: ; mask branch [[LABEL_FLOW:BB[0-9]+_[0-9]+]]
-; SI: ; %bb.{{[0-9]+}}: ; %bb20
+; SI: BB{{[0-9]+_[0-9]+}}: ; %bb20
; SI: buffer_store_dword
; SI: [[LABEL_FLOW]]:
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index b05a8acf901..ce73f9c89db 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -151,7 +151,7 @@ define amdgpu_kernel void @test_vop3_cmp_u32_sop_or(i32 addrspace(1)* %arg) {
; GCN-LABEL: {{^}}test_mask_if:
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
-; GCN: s_cbranch_execz
+; GCN: ; mask branch
define amdgpu_kernel void @test_mask_if(i32 addrspace(1)* %arg) #0 {
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%cmp = icmp ugt i32 %lid, 10
@@ -175,18 +175,19 @@ endif:
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
; GCN: s_cbranch_execz
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: BB{{.*}}:
; GCN: BB{{.*}}:
; GFX1032: s_xor_b32 s{{[0-9]+}}, exec_lo, s{{[0-9]+}}
; GFX1064: s_xor_b64 s[{{[0-9:]+}}], exec, s[{{[0-9:]+}}]
-; GCN: ; %bb.{{[0-9]+}}:
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: ; mask branch BB
+; GCN: BB{{.*}}:
+; GCN: BB{{.*}}:
; GFX1032: s_or_b32 exec_lo, exec_lo, s{{[0-9]+}}
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}}
; GFX1064: s_or_b64 exec, exec, s[{{[0-9:]+}}]
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
-; GCN: s_cbranch_execz BB
-; GCN: ; %bb.{{[0-9]+}}:
+; GCN: ; mask branch BB
+; GCN: BB{{.*}}:
; GCN: BB{{.*}}:
; GCN: s_endpgm
define amdgpu_kernel void @test_loop_with_if(i32 addrspace(1)* %arg) #0 {
@@ -227,8 +228,9 @@ bb13:
; GCN-LABEL: {{^}}test_loop_with_if_else_break:
; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo
; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}}
+; GCN: ; mask branch
; GCN: s_cbranch_execz
-; GCN: ; %bb.{{[0-9]+}}: ; %.preheader
+; GCN: BB{{.*}}:
; GCN: BB{{.*}}:
; GFX1032: s_or_b32 [[MASK0:s[0-9]+]], [[MASK0]], vcc_lo
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index f3a44a78e89..b799c2b5993 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -425,8 +425,9 @@ END:
;CHECK-NEXT: s_and_b64 exec, exec, [[ORIG]]
;CHECK-NEXT: s_and_b64 [[SAVED]], exec, [[SAVED]]
;CHECK-NEXT: s_xor_b64 exec, exec, [[SAVED]]
-;CHECK-NEXT: s_cbranch_execz [[END_BB:BB[0-9]+_[0-9]+]]
-;CHECK-NEXT: ; %bb.{{[0-9]+}}: ; %ELSE
+;CHECK-NEXT: mask branch [[END_BB:BB[0-9]+_[0-9]+]]
+;CHECK-NEXT: s_cbranch_execz [[END_BB]]
+;CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %ELSE
;CHECK: store_dword
;CHECK: [[END_BB]]: ; %END
;CHECK: s_or_b64 exec, exec,
OpenPOWER on IntegriCloud