diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-02-23 10:45:46 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-02-23 10:45:46 +0000 |
commit | 43c1115cd461c1ed6bf5e599eedb17ed64b87c47 (patch) | |
tree | 4609cb8c43e53b21222ce3ffb466e254167bce36 /llvm/test/Transforms/StructurizeCFG | |
parent | 983d6c3f18908532b28887ca96ac0da6ad921e7f (diff) | |
download | bcm5719-llvm-43c1115cd461c1ed6bf5e599eedb17ed64b87c47.tar.gz bcm5719-llvm-43c1115cd461c1ed6bf5e599eedb17ed64b87c47.zip |
StructurizeCFG: Test for branch divergence correctly
Summary:
This fixes cases like the new test @nonuniform. In that test, %cc itself
is a uniform value; however, when reading it after the end of the loop in
basic block %if, its value is effectively non-uniform.
This problem was encountered in
https://bugs.freedesktop.org/show_bug.cgi?id=103743; however, this change
in itself is not sufficient to fix that bug, as there is another issue
in the AMDGPU backend.
Change-Id: I32bbffece4a32f686fab54964dae1a5dd72949d4
Reviewers: arsenm, rampitec, jlebar
Subscribers: wdng, tpr, llvm-commits
Differential Revision: https://reviews.llvm.org/D40546
llvm-svn: 325881
Diffstat (limited to 'llvm/test/Transforms/StructurizeCFG')
-rw-r--r-- | llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll new file mode 100644 index 00000000000..0e613721564 --- /dev/null +++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions < %s | FileCheck %s + +define amdgpu_cs void @uniform(i32 inreg %v) { +; CHECK-LABEL: @uniform( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CC:%.*]] = icmp eq i32 [[V:%.*]], 0 +; CHECK-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]], !structurizecfg.uniform !0 +; CHECK: if: +; CHECK-NEXT: br label [[END]], !structurizecfg.uniform !0 +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %cc = icmp eq i32 %v, 0 + br i1 %cc, label %if, label %end + +if: + br label %end + +end: + ret void +} + +define amdgpu_cs void @nonuniform(i32 addrspace(2)* %ptr) { +; CHECK-LABEL: @nonuniform( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW:%.*]] ] +; CHECK-NEXT: [[CC:%.*]] = icmp ult i32 [[I]], 4 +; CHECK-NEXT: br i1 [[CC]], label [[MID_LOOP:%.*]], label [[FLOW]] +; CHECK: mid.loop: +; CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[CC2:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[CC2]], label [[END_LOOP:%.*]], label [[FLOW1:%.*]] +; CHECK: Flow: +; CHECK-NEXT: [[TMP0]] = phi i32 [ [[TMP2:%.*]], [[FLOW1]] ], [ undef, [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP3:%.*]], [[FLOW1]] ], [ true, [[FOR_BODY]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: end.loop: +; CHECK-NEXT: [[I_INC:%.*]] = add i32 [[I]], 1 +; CHECK-NEXT: br label [[FLOW1]] +; CHECK: Flow1: +; CHECK-NEXT: [[TMP2]] = phi i32 [ [[I_INC]], [[END_LOOP]] ], [ undef, [[MID_LOOP]] ] +; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[END_LOOP]] ], [ true, [[MID_LOOP]] ] +; CHECK-NEXT: br label [[FLOW]] +; CHECK: for.end: +; CHECK-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [%i.inc, %end.loop] + %cc = icmp ult i32 %i, 4 + br i1 %cc, label %mid.loop, label %for.end + +mid.loop: + %v = call i32 @llvm.amdgcn.workitem.id.x() + %cc2 = icmp eq i32 %v, 0 + br i1 %cc2, label %end.loop, label %for.end + +end.loop: + %i.inc = add i32 %i, 1 + br label %for.body + +for.end: + br i1 %cc, label %if, label %end + +if: + br label %end + +end: + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() |