summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-06 01:02:51 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-06 01:02:51 +0000
commitad55ee58691b58f3ac36e688fc90808d5cf71e7f (patch)
tree712b24af2cc28eb96613647ee32d130ba02f8d03
parent9642b36e91c28b888db018ebec8998b5bee33338 (diff)
downloadbcm5719-llvm-ad55ee58691b58f3ac36e688fc90808d5cf71e7f.tar.gz
bcm5719-llvm-ad55ee58691b58f3ac36e688fc90808d5cf71e7f.zip
AMDGPU: Don't required structured CFG
The structured CFG is just an aid to inserting exec mask modification instructions, once that is done we don't really need it anymore. We also do not analyze blocks with terminators that modify exec, so this should only be impacting true branches. llvm-svn: 288744
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp5
-rw-r--r--llvm/test/CodeGen/AMDGPU/basic-branch.ll11
-rw-r--r--llvm/test/CodeGen/AMDGPU/br_cc.f16.ll25
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-relaxation.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll5
-rw-r--r--llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll47
-rw-r--r--llvm/test/CodeGen/AMDGPU/sgpr-copy.ll31
-rw-r--r--llvm/test/CodeGen/AMDGPU/skip-if-dead.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-cfg.ll10
10 files changed, 107 insertions, 47 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7287b56aa6d..e1fd95d0917 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -162,7 +162,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
TLOF(createTLOF(getTargetTriple())),
IntrinsicInfo() {
- setRequiresStructuredCFG(true);
initAsmInfo();
}
@@ -191,7 +190,9 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
TargetOptions Options,
Optional<Reloc::Model> RM,
CodeModel::Model CM, CodeGenOpt::Level OL)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+ setRequiresStructuredCFG(true);
+}
const R600Subtarget *R600TargetMachine::getSubtargetImpl(
const Function &F) const {
diff --git a/llvm/test/CodeGen/AMDGPU/basic-branch.ll b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
index 83313ed5327..24874ee7fa9 100644
--- a/llvm/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
@@ -8,13 +8,15 @@
; GCNNOOPT: v_writelane_b32
; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
+
; GCN: ; BB#1
; GCNNOOPT: v_readlane_b32
; GCNNOOPT: v_readlane_b32
; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
; GCN: s_endpgm
define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
%cmp = icmp ne i32 %val, 0
@@ -35,9 +37,10 @@ end:
; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
; GCN: s_endpgm
define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
%cmp0 = icmp ne i1 %val, 0
diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
index 6cf3fdad3e3..970260412c4 100644
--- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -12,9 +12,10 @@
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
-; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
+; SI: s_branch
+; VI: buffer_store_short
+; VI: s_endpgm
; GCN: two{{$}}
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
@@ -47,17 +48,19 @@ two:
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
-; GCN: s_cbranch_vccnz
+; SI: s_cbranch_vccz
+; VI: s_cbranch_vccnz
-; GCN: one{{$}}
-; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; VI: one{{$}}
+; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
; GCN: two{{$}}
; SI: v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
-; GCN: buffer_store_short v[[B_F16]]
-; GCN: s_endpgm
+
+; SI: one{{$}}
+; SI: buffer_store_short v[[A_F16]]
+; SI: s_endpgm
+
define void @br_cc_f16_imm_a(
half addrspace(1)* %r,
half addrspace(1)* %b) {
@@ -87,8 +90,6 @@ two:
; GCN: one{{$}}
; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
; GCN: two{{$}}
; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 92debd8b927..39505404a86 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -475,14 +475,13 @@ ret:
; GCN-LABEL: {{^}}long_branch_hang:
; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
-; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: s_branch [[SHORTB:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
-; GCN-NEXT: [[LONG_BR_0]]:
; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
; GCN: s_setpc_b64
-; GCN: [[SHORTB]]:
+; GCN-NEXT: [[LONG_BR_0]]:
; GCN-DAG: v_cmp_lt_i32
; GCN-DAG: v_cmp_gt_i32
; GCN: s_cbranch_vccnz
@@ -492,7 +491,6 @@ ret:
; GCN: [[LONG_BR_DEST0]]
; GCN: v_cmp_ne_u32_e32
-; GCN-NEXT: ; implicit-def
; GCN-NEXT: s_cbranch_vccz
; GCN: s_setpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 3a933306c64..528e12b76ce 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -506,11 +506,13 @@ bb:
bb1:
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp3 = extractelement <4 x float> %tmp2, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
br label %bb7
bb4:
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp6 = extractelement <4 x float> %tmp5, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
br label %bb7
bb7:
@@ -554,11 +556,13 @@ bb:
bb1: ; preds = %bb
%tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out
br label %bb7
bb4: ; preds = %bb
%tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
%tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
+ call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out
br label %bb7
bb7: ; preds = %bb4, %bb1
@@ -745,6 +749,8 @@ bb8: ; preds = %bb2
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare void @llvm.amdgcn.s.barrier() #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll b/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
index 8dbec18dbf2..078d6330ce0 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
@@ -1,8 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; FIXME: Enabling critical edge splitting will fix this.
-; XFAIL: *
-
; Make sure that m0 is not reinitialized in the loop.
; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
@@ -12,7 +9,9 @@
; GCN: s_mov_b32 m0, -1
; GCN: BB0_2:
+; GCN-NOT: m0
; GCN: ds_read_b32
+; GCN-NOT: m0
; GCN: buffer_store_dword
; GCN: s_cbranch_scc0 BB0_2
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index bb3f9491435..d5d2f6b717f 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
;
;
; Most SALU instructions ignore control flow, so we need to make sure
@@ -9,7 +9,9 @@
; about instructions in different blocks overwriting each other.
; SI-LABEL: {{^}}sgpr_if_else_salu_br:
; SI: s_add
-; SI: s_add
+; SI: s_branch
+
+; SI: s_sub
define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
entry:
@@ -17,6 +19,45 @@ entry:
br i1 %0, label %if, label %else
if:
+ %1 = sub i32 %b, %c
+ br label %endif
+
+else:
+ %2 = add i32 %d, %e
+ br label %endif
+
+endif:
+ %3 = phi i32 [%1, %if], [%2, %else]
+ %4 = add i32 %3, %a
+ store i32 %4, i32 addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}sgpr_if_else_salu_br_opt:
+; SI: s_cmp_lg_u32
+; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]]
+
+; SI: ; BB#1: ; %else
+; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf
+; SI-NOT: add
+; SI: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
+
+; SI: [[IF]]: ; %if
+; SI: s_load_dword [[LOAD0]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_load_dword [[LOAD1]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NOT: add
+
+; SI: [[ENDIF]]: ; %endif
+; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
+; SI: buffer_store_dword
+; SI-NEXT: s_endpgm
+define void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+ %0 = icmp eq i32 %a, 0
+ br i1 %0, label %if, label %else
+
+if:
%1 = add i32 %b, %c
br label %endif
@@ -67,7 +108,7 @@ endif:
; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
-; SI: BB2_2:
+; SI: BB{{[0-9]+}}_2:
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index da270c533ec..e65f1e2da57 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; This test checks that no VGPR to SGPR copies are created by the register
; allocator.
@@ -223,8 +223,15 @@ declare i32 @llvm.SI.packf16(float, float) #1
; an assertion failure.
; CHECK-LABEL: {{^}}sample_v3:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
+; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13
+; CHECK: s_branch
+
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7
+
+; CHECK: BB{{[0-9]+_[0-9]+}}:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
; CHECK: exp
; CHECK: s_endpgm
define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
@@ -241,14 +248,14 @@ entry:
br i1 %tmp27, label %if, label %else
if: ; preds = %entry
- %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%val.if.0 = extractelement <4 x float> %val.if, i32 0
%val.if.1 = extractelement <4 x float> %val.if, i32 1
%val.if.2 = extractelement <4 x float> %val.if, i32 2
br label %endif
else: ; preds = %entry
- %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%val.else.0 = extractelement <4 x float> %val.else, i32 0
%val.else.1 = extractelement <4 x float> %val.else, i32 1
%val.else.2 = extractelement <4 x float> %val.else, i32 2
@@ -317,9 +324,15 @@ ENDIF69: ; preds = %LOOP68
; This test checks that image_sample resource descriptors aren't loaded into
; vgprs. The verifier will fail if this happens.
-; CHECK-LABEL:{{^}}sample_rsrc:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK-LABEL:{{^}}sample_rsrc
+
+; CHECK: s_cmp_eq_u32
+; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
+
+; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
+
+; [[END]]:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
; CHECK: s_endpgm
define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index d458faa818b..33f5e98fcc7 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -308,10 +308,8 @@ end:
; CHECK: s_mov_b64 exec, 0
; CHECK: [[SKIPKILL]]:
-; CHECK: v_cmp_nge_f32
-; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
-
-; CHECK: [[UNREACHABLE]]:
+; CHECK: v_cmp_nge_f32_e32 vcc
+; CHECK-NEXT: BB#3: ; %bb5
; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index a5d1cd2281c..a0060bd368b 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -197,15 +197,15 @@ if.end: ; preds = %if.else, %if.then
; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
-; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
-; GCN: buffer_store_dword [[TWO]]
+; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
; GCN: [[IF_LABEL]]:
-; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
-; GCN: buffer_store_dword [[ONE]]
+; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
+
+; GCN-NEXT: [[ENDIF_LABEL]]:
+; GCN: buffer_store_dword [[IMM_REG]]
-; GCN: [[ENDIF_LABEL]]:
; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
; GCN: buffer_store_dword [[THREE]]
; GCN: s_endpgm
OpenPOWER on IntegriCloud