diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-10-31 13:26:48 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2018-10-31 13:26:48 +0000 |
commit | 28212cc6891559855d41066d68e64a84097bb749 (patch) | |
tree | 58796a00d00b99291d0b62e166d4dce0242cb6a6 /llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll | |
parent | 2efccd2cf2804e2143c67c01bdfa44c5e3d887ac (diff) | |
download | bcm5719-llvm-28212cc6891559855d41066d68e64a84097bb749.tar.gz bcm5719-llvm-28212cc6891559855d41066d68e64a84097bb749.zip |
AMDGPU: Remove PHI loop condition optimization
Summary:
The optimization to early break out of loops if all threads are dead was
never fully implemented.
But the PHI node analyzing is actually causing a number of problems, so
remove all the extra code for it.
(This does actually regress code quality in a few places because it
ends up relying more heavily on phi's of i1, which we don't do a
great job with. However, since it fixes real bugs in the wild, we
should take this change. I have some prototype changes to improve
i1 lowering in general -- not just for control flow -- which should
help recover the code quality, I just need to make those changes
fit for general consumption. -- Nicolai)
Change-Id: I6fc6c6c8961857ac6009fcfb9f7e5e48dc23fbb1
Patch-by: Christian König <christian.koenig@amd.com>
Reviewers: arsenm, rampitec, tpr
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D53359
llvm-svn: 345718
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll | 44 |
1 files changed, 20 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 8489a785310..a007c965f94 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -10,7 +10,7 @@ ; IR-LABEL: @reduced_nested_loop_conditions( ; IR: bb5: -; IR-NEXT: %phi.broken = phi i64 [ %loop.phi, %bb10 ], [ 0, %bb ] +; IR-NEXT: %phi.broken = phi i64 [ %3, %bb10 ], [ 0, %bb ] ; IR-NEXT: %tmp6 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ] ; IR-NEXT: %tmp7 = icmp eq i32 %tmp6, 1 ; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp7) @@ -19,25 +19,23 @@ ; IR-NEXT: br i1 %1, label %bb8, label %Flow ; IR: bb8: -; IR-NEXT: %3 = call i64 @llvm.amdgcn.break(i64 %phi.broken) ; IR-NEXT: br label %bb13 ; IR: bb10: -; IR-NEXT: %loop.phi = phi i64 [ %6, %Flow ] -; IR-NEXT: %tmp11 = phi i32 [ %5, %Flow ] -; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop(i64 %loop.phi) +; IR-NEXT: %tmp11 = phi i32 [ %6, %Flow ] +; IR-NEXT: %tmp12 = phi i1 [ %5, %Flow ] +; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken) +; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop(i64 %3) ; IR-NEXT: br i1 %4, label %bb23, label %bb5 ; IR: Flow: -; IR-NEXT: %loop.phi1 = phi i64 [ %loop.phi2, %bb4 ], [ %phi.broken, %bb5 ] -; IR-NEXT: %5 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ] -; IR-NEXT: %6 = call i64 @llvm.amdgcn.else.break(i64 %2, i64 %loop.phi1) +; IR-NEXT: %5 = phi i1 [ %tmp22, %bb4 ], [ true, %bb5 ] +; IR-NEXT: %6 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ] ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %2) ; IR-NEXT: br label %bb10 ; IR: bb13: -; IR-NEXT: %loop.phi3 = phi i64 [ %loop.phi4, %bb3 ], [ %3, %bb8 ] -; IR-NEXT: %tmp14 = phi i1 [ false, %bb3 ], [ true, %bb8 ] +; IR-NEXT: %tmp14 = phi i1 [ %tmp22, %bb3 ], [ true, %bb8 ] ; IR-NEXT: %tmp15 = bitcast i64 %tmp2 to <2 x i32> ; IR-NEXT: br i1 %tmp14, label %bb16, label %bb20 @@ -48,13 +46,12 @@ ; IR-NEXT: br label %bb20 ; IR: bb20: -; IR-NEXT: %loop.phi4 = phi i64 [ %phi.broken, %bb16 ], [ %phi.broken, %bb13 ] -; IR-NEXT: %loop.phi2 = phi i64 [ %phi.broken, %bb16 ], [ %loop.phi3, %bb13 ] ; IR-NEXT: %tmp21 = phi i32 [ %tmp19, %bb16 ], [ 0, %bb13 ] +; IR-NEXT: %tmp22 = phi i1 [ false, %bb16 ], [ %tmp14, %bb13 ] ; IR-NEXT: br label %bb9 ; IR: bb23: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %3) ; IR-NEXT: ret void ; GCN-LABEL: {{^}}reduced_nested_loop_conditions: @@ -125,7 +122,7 @@ bb23: ; preds = %bb10 ; IR: Flow3: ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %21) -; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %13) +; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %14) ; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0 ; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1 ; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4 @@ -147,25 +144,24 @@ bb23: ; preds = %bb10 ; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp15) ; IR: Flow1: -; IR-NEXT: %loop.phi = phi i64 [ %18, %bb21 ], [ %phi.broken, %bb14 ] ; IR-NEXT: %11 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %bb14 ] ; IR-NEXT: %12 = phi i32 [ %tmp10, %bb21 ], [ undef, %bb14 ] -; IR-NEXT: %13 = phi i1 [ %17, %bb21 ], [ false, %bb14 ] -; IR-NEXT: %14 = phi i1 [ false, %bb21 ], [ true, %bb14 ] -; IR-NEXT: %15 = call i64 @llvm.amdgcn.else.break(i64 %10, i64 %loop.phi) +; IR-NEXT: %13 = phi i1 [ %18, %bb21 ], [ true, %bb14 ] +; IR-NEXT: %14 = phi i1 [ %18, %bb21 ], [ false, %bb14 ] +; IR-NEXT: %15 = phi i1 [ false, %bb21 ], [ true, %bb14 ] ; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %10) -; IR-NEXT: %16 = call i1 @llvm.amdgcn.loop(i64 %15) -; IR-NEXT: br i1 %16, label %Flow2, label %bb14 +; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break(i1 %13, i64 %phi.broken) +; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop(i64 %16) +; IR-NEXT: br i1 %17, label %Flow2, label %bb14 ; IR: bb21: ; IR: %tmp12 = icmp slt i32 %tmp11, 9 -; IR-NEXT: %17 = xor i1 %tmp12, true -; IR-NEXT: %18 = call i64 @llvm.amdgcn.if.break(i1 %17, i64 %phi.broken) +; IR-NEXT: %18 = xor i1 %tmp12, true ; IR-NEXT: br label %Flow1 ; IR: Flow2: -; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %15) -; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if(i1 %14) +; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %16) +; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if(i1 %15) ; IR-NEXT: %20 = extractvalue { i1, i64 } %19, 0 ; IR-NEXT: %21 = extractvalue { i1, i64 } %19, 1 ; IR-NEXT: br i1 %20, label %bb31.loopexit, label %Flow3 |