summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/StructurizeCFG
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2018-02-23 10:45:46 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2018-02-23 10:45:46 +0000
commit43c1115cd461c1ed6bf5e599eedb17ed64b87c47 (patch)
tree4609cb8c43e53b21222ce3ffb466e254167bce36 /llvm/test/Transforms/StructurizeCFG
parent983d6c3f18908532b28887ca96ac0da6ad921e7f (diff)
downloadbcm5719-llvm-43c1115cd461c1ed6bf5e599eedb17ed64b87c47.tar.gz
bcm5719-llvm-43c1115cd461c1ed6bf5e599eedb17ed64b87c47.zip
StructurizeCFG: Test for branch divergence correctly
Summary: This fixes cases like the new test @nonuniform. In that test, %cc itself is a uniform value; however, when reading it after the end of the loop in basic block %if, its value is effectively non-uniform. This problem was encountered in https://bugs.freedesktop.org/show_bug.cgi?id=103743; however, this change in itself is not sufficient to fix that bug, as there is another issue in the AMDGPU backend. Change-Id: I32bbffece4a32f686fab54964dae1a5dd72949d4 Reviewers: arsenm, rampitec, jlebar Subscribers: wdng, tpr, llvm-commits Differential Revision: https://reviews.llvm.org/D40546 llvm-svn: 325881
Diffstat (limited to 'llvm/test/Transforms/StructurizeCFG')
-rw-r--r--llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll82
1 files changed, 82 insertions, 0 deletions
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
new file mode 100644
index 00000000000..0e613721564
--- /dev/null
+++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions < %s | FileCheck %s
+
+define amdgpu_cs void @uniform(i32 inreg %v) {
+; CHECK-LABEL: @uniform(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CC:%.*]] = icmp eq i32 [[V:%.*]], 0
+; CHECK-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]], !structurizecfg.uniform !0
+; CHECK: if:
+; CHECK-NEXT: br label [[END]], !structurizecfg.uniform !0
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cc = icmp eq i32 %v, 0
+ br i1 %cc, label %if, label %end
+
+if:
+ br label %end
+
+end:
+ ret void
+}
+
+define amdgpu_cs void @nonuniform(i32 addrspace(2)* %ptr) {
+; CHECK-LABEL: @nonuniform(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FLOW:%.*]] ]
+; CHECK-NEXT: [[CC:%.*]] = icmp ult i32 [[I]], 4
+; CHECK-NEXT: br i1 [[CC]], label [[MID_LOOP:%.*]], label [[FLOW]]
+; CHECK: mid.loop:
+; CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[CC2:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT: br i1 [[CC2]], label [[END_LOOP:%.*]], label [[FLOW1:%.*]]
+; CHECK: Flow:
+; CHECK-NEXT: [[TMP0]] = phi i32 [ [[TMP2:%.*]], [[FLOW1]] ], [ undef, [[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP3:%.*]], [[FLOW1]] ], [ true, [[FOR_BODY]] ]
+; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: end.loop:
+; CHECK-NEXT: [[I_INC:%.*]] = add i32 [[I]], 1
+; CHECK-NEXT: br label [[FLOW1]]
+; CHECK: Flow1:
+; CHECK-NEXT: [[TMP2]] = phi i32 [ [[I_INC]], [[END_LOOP]] ], [ undef, [[MID_LOOP]] ]
+; CHECK-NEXT: [[TMP3]] = phi i1 [ false, [[END_LOOP]] ], [ true, [[MID_LOOP]] ]
+; CHECK-NEXT: br label [[FLOW]]
+; CHECK: for.end:
+; CHECK-NEXT: br i1 [[CC]], label [[IF:%.*]], label [[END:%.*]]
+; CHECK: if:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [0, %entry], [%i.inc, %end.loop]
+ %cc = icmp ult i32 %i, 4
+ br i1 %cc, label %mid.loop, label %for.end
+
+mid.loop:
+ %v = call i32 @llvm.amdgcn.workitem.id.x()
+ %cc2 = icmp eq i32 %v, 0
+ br i1 %cc2, label %end.loop, label %for.end
+
+end.loop:
+ %i.inc = add i32 %i, 1
+ br label %for.body
+
+for.end:
+ br i1 %cc, label %if, label %end
+
+if:
+ br label %end
+
+end:
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
OpenPOWER on IntegriCloud