StructurizeCFG: Test for branch divergence correctly

Fixes cases like the new test @nonuniform. In that test, %cc itself is a uniform value; however, when reading it after the end of the loop in basic block %if, its value is effectively non-uniform, so the branch is non-uniform. This problem was encountered in https://bugs.freedesktop.org/show_bug.cgi?id=103743; however, this change in itself is not sufficient to fix that bug, as there is another issue in the AMDGPU backend. As discovered after committing an earlier version of this change, this exposes a subtle interaction between this pass and DivergenceAnalysis: since we remove and re-create branch instructions, we can no longer rely on DivergenceAnalysis for branches in subregions that were already processed by the pass. Explicitly remove branch instructions from DivergenceAnalysis to avoid dangling pointers as a matter of defensive programming, and change how we detect non-uniform subregions. Change-Id: I32bbffece4a32f686fab54964dae1a5dd72949d4 Differential Revision: https://reviews.llvm.org/D43743 llvm-svn: 329165
author: Nicolai Haehnle <nhaehnle@gmail.com> 2018-04-04 10:58:15 +0000
committer: Nicolai Haehnle <nhaehnle@gmail.com> 2018-04-04 10:58:15 +0000
commit: eb7311ffb126282c9b131b2fa3d541eb0f0321ca (patch)
tree: 4b17467762861bff3d94d545200b7d86fe1d788e /llvm/test/Transforms/StructurizeCFG/AMDGPU
parent: 3ffd383a15349392247302866777425096aedcf2 (diff)
download: bcm5719-llvm-eb7311ffb126282c9b131b2fa3d541eb0f0321ca.tar.gz
bcm5719-llvm-eb7311ffb126282c9b131b2fa3d541eb0f0321ca.zip
1 files changed, 82 insertions, 0 deletions
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
new file mode 100644
index 00000000000..0e613721564
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions < %s | FileCheck %s
+
+define amdgpu_cs void @uniform(i32 inreg %v) {
+; CHECK-LABEL: @uniform(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CC:%.*]] = icmp eq i32 [[V:%.*]], 0
+; CHECK-NEXT:    br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]], !structurizecfg.uniform !0
+; CHECK:       if:
+; CHECK-NEXT:    br label [[END]], !structurizecfg.uniform !0
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cc = icmp eq i32 %v, 0
+  br i1 %cc, label %if, label %end
+
+if:
+  br label %end
+
+end:
+  ret void
+}
+
+define amdgpu_cs void @nonuniform(i32 addrspace(2)* %ptr) {
+; CHECK-LABEL: @nonuniform(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW:%.*]] ]
+; CHECK-NEXT:    [[CC:%.*]] = icmp ult i32 [[I]], 4
+; CHECK-NEXT:    br i1 [[CC]], label [[MID_LOOP:%.*]], label [[FLOW]]
+; CHECK:       mid.loop:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[CC2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[CC2]], label [[END_LOOP:%.*]], label [[FLOW1:%.*]]
+; CHECK:       Flow:
+; CHECK-NEXT:    [[TMP0]] = phi i32 [ [[TMP2:%.*]], [[FLOW1]] ], [ undef, [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP3:%.*]], [[FLOW1]] ], [ true, [[FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       end.loop:
+; CHECK-NEXT:    [[I_INC:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT:    br label [[FLOW1]]
+; CHECK:       Flow1:
+; CHECK-NEXT:    [[TMP2]] = phi i32 [ [[I_INC]], [[END_LOOP]] ], [ undef, [[MID_LOOP]] ]
+; CHECK-NEXT:    [[TMP3]] = phi i1 [ false, [[END_LOOP]] ], [ true, [[MID_LOOP]] ]
+; CHECK-NEXT:    br label [[FLOW]]
+; CHECK:       for.end:
+; CHECK-NEXT:    br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+  %cc = icmp ult i32 %i, 4
+  br i1 %cc, label %mid.loop, label %for.end
+
+mid.loop:
+  %v = call i32 @llvm.amdgcn.workitem.id.x()
+  %cc2 = icmp eq i32 %v, 0
+  br i1 %cc2, label %end.loop, label %for.end
+
+end.loop:
+  %i.inc = add i32 %i, 1
+  br label %for.body
+
+for.end:
+  br i1 %cc, label %if, label %end
+
+if:
+  br label %end
+
+end:
+  ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
author	Nicolai Haehnle <nhaehnle@gmail.com>	2018-04-04 10:58:15 +0000
committer	Nicolai Haehnle <nhaehnle@gmail.com>	2018-04-04 10:58:15 +0000
commit	eb7311ffb126282c9b131b2fa3d541eb0f0321ca (patch)
tree	4b17467762861bff3d94d545200b7d86fe1d788e /llvm/test/Transforms/StructurizeCFG/AMDGPU
parent	3ffd383a15349392247302866777425096aedcf2 (diff)
download	bcm5719-llvm-eb7311ffb126282c9b131b2fa3d541eb0f0321ca.tar.gz bcm5719-llvm-eb7311ffb126282c9b131b2fa3d541eb0f0321ca.zip