diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-08-15 00:53:06 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-08-15 00:53:06 +0000 |
| commit | 427a0fd22ef1910f2b5a2353f5b93d8397cf002f (patch) | |
| tree | 4bbb394ae16b644bdd200916d6647c2e1f408531 /llvm/test/Transforms/LoopStrengthReduce/AMDGPU | |
| parent | 3938f0c728b483fdb1123e7e756c81816d653892 (diff) | |
| download | bcm5719-llvm-427a0fd22ef1910f2b5a2353f5b93d8397cf002f.tar.gz bcm5719-llvm-427a0fd22ef1910f2b5a2353f5b93d8397cf002f.zip | |
LoopStrengthReduce: Try to pass address space to isLegalAddressingMode
This seems to only work some of the time. In some situations,
this seems to use a nonsensical type and isn't actually aware of the
memory being accessed. e.g. if branch condition is an icmp of a pointer,
it checks the addressing mode of i1.
llvm-svn: 245137
Diffstat (limited to 'llvm/test/Transforms/LoopStrengthReduce/AMDGPU')
3 files changed, 272 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll new file mode 100644 index 00000000000..bf61112a3c3 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll @@ -0,0 +1,156 @@ +; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s + +; Test that loops with different maximum offsets for different address +; spaces are correctly handled. + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32( +; OPT: {{^}}.lr.ph: +; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095 +; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1 +define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 4095 + %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1 + %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 + %tmp7 = add nsw i32 %tmp6, %tmp4 + store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32( +; OPT: {{^}}.lr.ph.preheader: +; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096 +; OPT: br label %.lr.ph + +; OPT: {{^}}.lr.ph: +; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1 +define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 4096 + %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1 + %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1 + %tmp4 = sext i8 %tmp3 to i32 + %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 + %tmp7 = add nsw i32 %tmp6, %tmp4 + store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32( +; OPT: {{^}}.lr.ph +; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535 +; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1 +define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 65535 + %tmp2 = trunc i64 %tmp1 to i32 + %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2 + %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp7, %tmp5 + store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32( +; OPT: {{^}}.lr.ph.preheader: +; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536 +; OPT: br label %.lr.ph + +; OPT: {{^}}.lr.ph: +; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] +; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1 +define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +bb: + %tmp = icmp sgt i32 %n, 0 + br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge + +.lr.ph.preheader: ; preds = %bb + br label %.lr.ph + +._crit_edge.loopexit: ; preds = %.lr.ph + br label %._crit_edge + +._crit_edge: ; preds = %._crit_edge.loopexit, %bb + ret void + +.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader + %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] + %tmp1 = add nuw nsw i64 %indvars.iv, 65536 + %tmp2 = trunc i64 %tmp1 to i32 + %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2 + %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1 + %tmp5 = sext i8 %tmp4 to i32 + %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv + %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp7, %tmp5 + store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..6baccf05fff --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll new file mode 100644 index 00000000000..bd80302a68b --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll @@ -0,0 +1,113 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s + +; Test various conditions where OptimizeLoopTermCond doesn't look at a +; memory instruction use and fails to find the address space. + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + +; CHECK-LABEL: @local_cmp_user( +; CHECK: bb11: +; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ] +; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ] + +; CHECK: bb: +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 +; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2 +; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef +define void @local_cmp_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i32 %i, 1 + %c0 = icmp eq i32 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef + %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii + %c1 = icmp ult i8 addrspace(3)* %p, undef + %i.next = add i32 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} + +; CHECK-LABEL: @global_cmp_user( +; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2 +; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2 +define void @global_cmp_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i64 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i64 %i, 1 + %c0 = icmp eq i64 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef + %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii + %c1 = icmp ult i8 addrspace(1)* %p, undef + %i.next = add i64 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} + +; CHECK-LABEL: @global_gep_user( +; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1 +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 +define void @global_gep_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i32 %i, 1 + %c0 = icmp eq i32 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef + %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii + %c1 = icmp ult i8 addrspace(1)* %p, undef + %i.next = add i32 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} + +; CHECK-LABEL: @global_sext_scale_user( +; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext +; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1 +; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2 +define void @global_sext_scale_user() nounwind { +entry: + br label %bb11 + +bb11: + %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] + %ii = shl i32 %i, 1 + %ii.ext = sext i32 %ii to i64 + %c0 = icmp eq i32 %i, undef + br i1 %c0, label %bb13, label %bb + +bb: + %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef + %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext + %c1 = icmp ult i8 addrspace(1)* %p, undef + %i.next = add i32 %i, 1 + br i1 %c1, label %bb11, label %bb13 + +bb13: + unreachable +} |

