diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll | 64 |
3 files changed, 67 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 37b3d03d132..c70eb3d716c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -45,6 +45,7 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" #include <memory> @@ -678,6 +679,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() { } bool AMDGPUPassConfig::addPreISel() { + addPass(&LowerSwitchID); addPass(createFlattenCFGPass()); return false; } diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index f58f79f8b14..41edcb56aab 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -275,6 +275,7 @@ public: AU.addRequired<LoopInfoWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreservedID(LowerSwitchID); RegionPass::getAnalysisUsage(AU); } }; diff --git a/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll b/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll new file mode 100644 index 00000000000..e44f99cd950 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -print-after=si-annotate-control-flow %s -o /dev/null 2>&1 | FileCheck %s + +; CHECK-LABEL: @switch_unreachable_default + +define amdgpu_kernel void @switch_unreachable_default(i32 addrspace(1)* %out, i8 addrspace(1)* %in0, i8 addrspace(1)* %in1) #0 { +centry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + switch i32 %tid, label %sw.default [ + i32 0, label %sw.bb0 + i32 1, label %sw.bb1 + ] + +sw.bb0: + br label %sw.epilog + +sw.bb1: + br label %sw.epilog + +sw.default: + unreachable + +sw.epilog: + %ptr = phi i8 addrspace(1)* [%in0, %sw.bb0], [%in1, %sw.bb1] + %gep_in = getelementptr inbounds i8, i8 addrspace(1)* %ptr, i64 0 + br label %sw.while + +; The loop below is necessary to preserve the effect of the +; unreachable default on divergence analysis in the presence of other +; optimizations. The loop consists of a single block where the loop +; exit is divergent because it depends on the divergent phi at the +; start of the block. The checks below ensure that the loop exit is +; handled correctly as divergent. But the data-flow within the block +; is sensitive to optimizations; so we just ensure that the relevant +; operations in the block body are indeed in the same block. + +; CHECK: [[PHI:%[a-zA-Z0-9._]+]] = phi i64 +; CHECK-NOT: {{ br }} +; CHECK: load i8 +; CHECK-NOT: {{ br }} +; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq +; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break(i1 [[ICMP]], i64 [[PHI]]) +; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop(i64 [[IF]]) +; CHECK: br i1 [[LOOP]] + +sw.while: + %p = phi i8 addrspace(1)* [ %gep_in, %sw.epilog ], [ %incdec.ptr, %sw.while ] + %count = phi i32 [ 0, %sw.epilog ], [ %count.inc, %sw.while ] + %char = load i8, i8 addrspace(1)* %p, align 1 + %tobool = icmp eq i8 %char, 0 + %incdec.ptr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 1 + %count.inc = add i32 %count, 1 + br i1 %tobool, label %sw.exit, label %sw.while + +sw.exit: + %tid64 = zext i32 %tid to i64 + %gep_out = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid64 + store i32 %count, i32 addrspace(1)* %gep_out, align 4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { convergent noinline optnone } |

