diff options
| author | Eric Christopher <echristo@gmail.com> | 2019-04-17 02:12:23 +0000 |
|---|---|---|
| committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 02:12:23 +0000 |
| commit | a86343512845c9c1fdbac865fea88aa5fce7142a (patch) | |
| tree | 666fc6353de19ad8b00e56b67edd33f24104e4a7 /llvm/test/Transforms/StraightLineStrengthReduce | |
| parent | 7f8ca6e3679b3af951cb7a4b1377edfaa3244b93 (diff) | |
| download | bcm5719-llvm-a86343512845c9c1fdbac865fea88aa5fce7142a.tar.gz bcm5719-llvm-a86343512845c9c1fdbac865fea88aa5fce7142a.zip | |
Temporarily Revert "Add basic loop fusion pass."
As it's causing some bot failures (and per request from kbarton).
This reverts commit r358543/ab70da07286e618016e78247e4a24fcb84077fda.
llvm-svn: 358546
Diffstat (limited to 'llvm/test/Transforms/StraightLineStrengthReduce')
11 files changed, 0 insertions, 832 deletions
diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg deleted file mode 100644 index 2a665f06be7..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'AMDGPU' in config.root.targets: - config.unsupported = True diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll deleted file mode 100644 index f587a93bf1e..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/pr23975.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: opt < %s -slsr -S | FileCheck %s - -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" -target triple = "amdgcn--" - -%struct.Matrix4x4 = type { [4 x [4 x float]] } - -; Function Attrs: nounwind -define fastcc void @Accelerator_Intersect(%struct.Matrix4x4 addrspace(1)* nocapture readonly %leafTransformations) #0 { -; CHECK-LABEL: @Accelerator_Intersect( -entry: - %tmp = sext i32 undef to i64 - %arrayidx114 = getelementptr inbounds %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp - %tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0 -; CHECK: %tmp1 = getelementptr %struct.Matrix4x4, %struct.Matrix4x4 addrspace(1)* %leafTransformations, i64 %tmp, i32 0, i64 0, i64 0 - %tmp2 = load <4 x float>, <4 x float> addrspace(1)* undef, align 4 - ret void -} - -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-cpu"="tahiti" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll deleted file mode 100644 index 9554ae69031..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll +++ /dev/null @@ -1,107 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-- -separate-const-offset-from-gep -slsr -gvn < %s | FileCheck %s - -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" - - -; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset( -; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]] -; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]] -define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 1023 - %tmp = sext i32 %j1 to i64 - %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp - %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)* - %v11 = load i32, i32 addrspace(1)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 1023 - %tmp5 = sext i32 %j2 to i64 - %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 - %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)* - %v22 = load i32, i32 addrspace(1)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} - -; CHECK-LABEL: @slsr_after_reassociate_global_geps_over_mubuf_max_offset( -; CHECK: %j1 = add nsw i32 %i, 1024 -; CHECK: %tmp = sext i32 %j1 to i64 -; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp -; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 -define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 1024 - %tmp = sext i32 %j1 to i64 - %p1 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp - %tmp3 = bitcast float addrspace(1)* %p1 to i32 addrspace(1)* - %v11 = load i32, i32 addrspace(1)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 1024 - %tmp5 = sext i32 %j2 to i64 - %p2 = getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 - %tmp6 = bitcast float addrspace(1)* %p2 to i32 addrspace(1)* - %v22 = load i32, i32 addrspace(1)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} - -; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset( -; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i -; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383 - -; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i -; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383 -define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 16383 - %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 - %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)* - %v11 = load i32, i32 addrspace(3)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 16383 - %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 - %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)* - %v22 = load i32, i32 addrspace(3)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} - -; CHECK-LABEL: @slsr_after_reassociate_lds_geps_over_ds_max_offset( -; CHECK: %j1 = add nsw i32 %i, 16384 -; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 -; CHECK: %j2 = add i32 %j1, %i -; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 -define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { -bb: - %i2 = shl nsw i32 %i, 1 - %j1 = add nsw i32 %i, 16384 - %p1 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 - %tmp3 = bitcast float addrspace(3)* %p1 to i32 addrspace(3)* - %v11 = load i32, i32 addrspace(3)* %tmp3, align 4 - %tmp4 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v11, i32 addrspace(1)* %tmp4, align 4 - - %j2 = add nsw i32 %i2, 16384 - %p2 = getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 - %tmp6 = bitcast float addrspace(3)* %p2 to i32 addrspace(3)* - %v22 = load i32, i32 addrspace(3)* %tmp6, align 4 - %tmp7 = bitcast float addrspace(1)* %out to i32 addrspace(1)* - store i32 %v22, i32 addrspace(1)* %tmp7, align 4 - - ret void -} diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/lit.local.cfg b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/lit.local.cfg deleted file mode 100644 index 2cb98eb371b..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'NVPTX' in config.root.targets: - config.unsupported = True diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll deleted file mode 100644 index 03c0356eb5b..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/reassociate-geps-and-slsr.ll +++ /dev/null @@ -1,74 +0,0 @@ -; RUN: opt < %s -separate-const-offset-from-gep -slsr -gvn -S | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=PTX - -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-unknown-unknown" - -; arr[i + 5] -; arr[i * 2 + 5] -; arr[i * 3 + 5] -; arr[i * 4 + 5] -; -; => reassociate-geps -; -; *(&arr[i] + 5) -; *(&arr[i * 2] + 5) -; *(&arr[i * 3] + 5) -; *(&arr[i * 4] + 5) -; -; => slsr -; -; p1 = &arr[i] -; *(p1 + 5) -; p2 = p1 + i -; *(p2 + 5) -; p3 = p2 + i -; *(p3 + 5) -; p4 = p3 + i -; *(p4 + 5) -define void @slsr_after_reassociate_geps(float* %arr, i32 %i) { -; CHECK-LABEL: @slsr_after_reassociate_geps( -; PTX-LABEL: .visible .func slsr_after_reassociate_geps( -; PTX: ld.param.u64 [[arr:%rd[0-9]+]], [slsr_after_reassociate_geps_param_0]; -; PTX: ld.param.u32 [[i:%r[0-9]+]], [slsr_after_reassociate_geps_param_1]; - %i2 = shl nsw i32 %i, 1 - %i3 = mul nsw i32 %i, 3 - %i4 = shl nsw i32 %i, 2 - - %j1 = add nsw i32 %i, 5 - %p1 = getelementptr inbounds float, float* %arr, i32 %j1 -; CHECK: [[b1:%[0-9]+]] = getelementptr float, float* %arr, i64 [[bump:%[0-9]+]] -; PTX: mul.wide.s32 [[i4:%rd[0-9]+]], [[i]], 4; -; PTX: add.s64 [[base1:%rd[0-9]+]], [[arr]], [[i4]]; - %v1 = load float, float* %p1, align 4 -; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base1]]+20]; - call void @foo(float %v1) - - %j2 = add nsw i32 %i2, 5 - %p2 = getelementptr inbounds float, float* %arr, i32 %j2 -; CHECK: [[b2:%[0-9]+]] = getelementptr float, float* [[b1]], i64 [[bump]] -; PTX: add.s64 [[base2:%rd[0-9]+]], [[base1]], [[i4]]; - %v2 = load float, float* %p2, align 4 -; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base2]]+20]; - call void @foo(float %v2) - - %j3 = add nsw i32 %i3, 5 - %p3 = getelementptr inbounds float, float* %arr, i32 %j3 -; CHECK: [[b3:%[0-9]+]] = getelementptr float, float* [[b2]], i64 [[bump]] -; PTX: add.s64 [[base3:%rd[0-9]+]], [[base2]], [[i4]]; - %v3 = load float, float* %p3, align 4 -; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base3]]+20]; - call void @foo(float %v3) - - %j4 = add nsw i32 %i4, 5 - %p4 = getelementptr inbounds float, float* %arr, i32 %j4 -; CHECK: [[b4:%[0-9]+]] = getelementptr float, float* [[b3]], i64 [[bump]] -; PTX: add.s64 [[base4:%rd[0-9]+]], [[base3]], [[i4]]; - %v4 = load float, float* %p4, align 4 -; PTX: ld.f32 {{%f[0-9]+}}, {{\[}}[[base4]]+20]; - call void @foo(float %v4) - - ret void -} - -declare void @foo(float) diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll deleted file mode 100644 index cb73565b152..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll +++ /dev/null @@ -1,71 +0,0 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -; CUDA code -; __global__ void foo(int b, int s) { -; #pragma unroll -; for (int i = 0; i < 4; ++i) { -; if (cond(i)) -; use((b + i) * s); -; } -; } -define void @foo(i32 %b, i32 %s) { -; CHECK-LABEL: .visible .entry foo( -entry: -; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1]; -; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0]; - %call = tail call zeroext i1 @cond(i32 0) - br i1 %call, label %if.then, label %for.inc - -if.then: ; preds = %entry - %mul = mul nsw i32 %b, %s -; CHECK: mul.lo.s32 [[a0:%r[0-9]+]], [[b]], [[s]] - tail call void @use(i32 %mul) - br label %for.inc - -for.inc: ; preds = %entry, %if.then - %call.1 = tail call zeroext i1 @cond(i32 1) - br i1 %call.1, label %if.then.1, label %for.inc.1 - -if.then.1: ; preds = %for.inc - %add.1 = add nsw i32 %b, 1 - %mul.1 = mul nsw i32 %add.1, %s -; CHECK: add.s32 [[a1:%r[0-9]+]], [[a0]], [[s]] - tail call void @use(i32 %mul.1) - br label %for.inc.1 - -for.inc.1: ; preds = %if.then.1, %for.inc - %call.2 = tail call zeroext i1 @cond(i32 2) - br i1 %call.2, label %if.then.2, label %for.inc.2 - -if.then.2: ; preds = %for.inc.1 - %add.2 = add nsw i32 %b, 2 - %mul.2 = mul nsw i32 %add.2, %s -; CHECK: add.s32 [[a2:%r[0-9]+]], [[a1]], [[s]] - tail call void @use(i32 %mul.2) - br label %for.inc.2 - -for.inc.2: ; preds = %if.then.2, %for.inc.1 - %call.3 = tail call zeroext i1 @cond(i32 3) - br i1 %call.3, label %if.then.3, label %for.inc.3 - -if.then.3: ; preds = %for.inc.2 - %add.3 = add nsw i32 %b, 3 - %mul.3 = mul nsw i32 %add.3, %s -; CHECK: add.s32 [[a3:%r[0-9]+]], [[a2]], [[s]] - tail call void @use(i32 %mul.3) - br label %for.inc.3 - -for.inc.3: ; preds = %if.then.3, %for.inc.2 - ret void -} - -declare zeroext i1 @cond(i32) - -declare void @use(i32) - -!nvvm.annotations = !{!0} - -!0 = !{void (i32, i32)* @foo, !"kernel", i32 1} diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/X86/lit.local.cfg b/llvm/test/Transforms/StraightLineStrengthReduce/X86/lit.local.cfg deleted file mode 100644 index c8625f4d9d2..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/X86/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'X86' in config.root.targets: - config.unsupported = True diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll b/llvm/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll deleted file mode 100644 index f11cbc5897a..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/X86/no-slsr.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: opt < %s -slsr -gvn -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Do not perform SLSR on &input[s] and &input[s * 2] which fit into addressing -; modes of X86. -define i32 @no_slsr_gep(i32* %input, i64 %s) { -; CHECK-LABEL: @no_slsr_gep( - ; v0 = input[0]; - %p0 = getelementptr inbounds i32, i32* %input, i64 0 - %v0 = load i32, i32* %p0 - - ; v1 = input[s]; - %p1 = getelementptr inbounds i32, i32* %input, i64 %s -; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s - %v1 = load i32, i32* %p1 - - ; v2 = input[s * 2]; - %s2 = mul nsw i64 %s, 2 - %p2 = getelementptr inbounds i32, i32* %input, i64 %s2 -; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i64 %s2 - %v2 = load i32, i32* %p2 - - ; return v0 + v1 + v2; - %1 = add i32 %v0, %v1 - %2 = add i32 %1, %v2 - ret i32 %2 -} - -define void @no_slsr_add(i32 %b, i32 %s) { -; CHECK-LABEL: @no_slsr_add( - %1 = add i32 %b, %s -; CHECK: add i32 %b, %s - call void @foo(i32 %1) - %s2 = mul i32 %s, 2 -; CHECK: %s2 = mul i32 %s, 2 - %2 = add i32 %b, %s2 -; CHECK: add i32 %b, %s2 - call void @foo(i32 %2) - ret void -} - -declare void @foo(i32 %a) diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll deleted file mode 100644 index 92af617dab8..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll +++ /dev/null @@ -1,172 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -slsr -gvn -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" - -define void @shl(i32 %b, i32 %s) { -; CHECK-LABEL: @shl( -; CHECK-NEXT: [[T1:%.*]] = add i32 [[B:%.*]], [[S:%.*]] -; CHECK-NEXT: call void @foo(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = add i32 [[T1]], [[S]] -; CHECK-NEXT: call void @foo(i32 [[T2]]) -; CHECK-NEXT: ret void -; - %t1 = add i32 %b, %s - call void @foo(i32 %t1) - %s2 = shl i32 %s, 1 - %t2 = add i32 %b, %s2 - call void @foo(i32 %t2) - ret void -} - -define void @stride_is_2s(i32 %b, i32 %s) { -; CHECK-LABEL: @stride_is_2s( -; CHECK-NEXT: [[S2:%.*]] = shl i32 [[S:%.*]], 1 -; CHECK-NEXT: [[T1:%.*]] = add i32 [[B:%.*]], [[S2]] -; CHECK-NEXT: call void @foo(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = add i32 [[T1]], [[S2]] -; CHECK-NEXT: call void @foo(i32 [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = add i32 [[T2]], [[S2]] -; CHECK-NEXT: call void @foo(i32 [[T3]]) -; CHECK-NEXT: ret void -; - %s2 = shl i32 %s, 1 - %t1 = add i32 %b, %s2 - call void @foo(i32 %t1) - %s4 = shl i32 %s, 2 - %t2 = add i32 %b, %s4 - call void @foo(i32 %t2) - %s6 = mul i32 %s, 6 - %t3 = add i32 %b, %s6 - call void @foo(i32 %t3) - ret void -} - -define void @stride_is_3s(i32 %b, i32 %s) { -; CHECK-LABEL: @stride_is_3s( -; CHECK-NEXT: [[T1:%.*]] = add i32 [[S:%.*]], [[B:%.*]] -; CHECK-NEXT: call void @foo(i32 [[T1]]) -; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[S]], 3 -; CHECK-NEXT: [[T2:%.*]] = add i32 [[T1]], [[TMP1]] -; CHECK-NEXT: call void @foo(i32 [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = add i32 [[T2]], [[TMP1]] -; CHECK-NEXT: call void @foo(i32 [[T3]]) -; CHECK-NEXT: ret void -; - %t1 = add i32 %s, %b - call void @foo(i32 %t1) - %s4 = shl i32 %s, 2 - %t2 = add i32 %s4, %b - call void @foo(i32 %t2) - %s7 = mul i32 %s, 7 - %t3 = add i32 %s7, %b - call void @foo(i32 %t3) - ret void -} - -; foo(b + 6 * s); -; foo(b + 4 * s); -; foo(b + 2 * s); -; => -; t1 = b + 6 * s; -; foo(t1); -; s2 = 2 * s; -; t2 = t1 - s2; -; foo(t2); -; t3 = t2 - s2; -; foo(t3); -define void @stride_is_minus_2s(i32 %b, i32 %s) { -; CHECK-LABEL: @stride_is_minus_2s( -; CHECK-NEXT: [[S6:%.*]] = mul i32 [[S:%.*]], 6 -; CHECK-NEXT: [[T1:%.*]] = add i32 [[B:%.*]], [[S6]] -; CHECK-NEXT: call void @foo(i32 [[T1]]) -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[S]], 1 -; CHECK-NEXT: [[T2:%.*]] = sub i32 [[T1]], [[TMP1]] -; CHECK-NEXT: call void @foo(i32 [[T2]]) -; CHECK-NEXT: [[T3:%.*]] = sub i32 [[T2]], [[TMP1]] -; CHECK-NEXT: call void @foo(i32 [[T3]]) -; CHECK-NEXT: ret void -; - %s6 = mul i32 %s, 6 - %t1 = add i32 %b, %s6 - call void @foo(i32 %t1) - %s4 = shl i32 %s, 2 - %t2 = add i32 %b, %s4 - call void @foo(i32 %t2) - %s2 = shl i32 %s, 1 - %t3 = add i32 %b, %s2 - call void @foo(i32 %t3) - ret void -} - -; TODO: This pass is targeted at simple address-calcs, so it is artificially limited to -; match scalar values. The code could be modified to handle vector types too. - -define void @stride_is_minus_2s_vec(<2 x i32> %b, <2 x i32> %s) { -; CHECK-LABEL: @stride_is_minus_2s_vec( -; CHECK-NEXT: [[S6:%.*]] = mul <2 x i32> [[S:%.*]], <i32 6, i32 6> -; CHECK-NEXT: [[T1:%.*]] = add <2 x i32> [[B:%.*]], [[S6]] -; CHECK-NEXT: call void @voo(<2 x i32> [[T1]]) -; CHECK-NEXT: [[S4:%.*]] = shl <2 x i32> [[S]], <i32 2, i32 2> -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[B]], [[S4]] -; CHECK-NEXT: call void @voo(<2 x i32> [[T2]]) -; CHECK-NEXT: [[S2:%.*]] = shl <2 x i32> [[S]], <i32 1, i32 1> -; CHECK-NEXT: [[T3:%.*]] = add <2 x i32> [[B]], [[S2]] -; CHECK-NEXT: call void @voo(<2 x i32> [[T3]]) -; CHECK-NEXT: ret void -; - %s6 = mul <2 x i32> %s, <i32 6, i32 6> - %t1 = add <2 x i32> %b, %s6 - call void @voo(<2 x i32> %t1) - %s4 = shl <2 x i32> %s, <i32 2, i32 2> - %t2 = add <2 x i32> %b, %s4 - call void @voo(<2 x i32> %t2) - %s2 = shl <2 x i32> %s, <i32 1, i32 1> - %t3 = add <2 x i32> %b, %s2 - call void @voo(<2 x i32> %t3) - ret void -} - -; t = b + (s << 3); -; foo(t); -; foo(b + s); -; -; do not rewrite b + s to t - 7 * s because the latter is more complicated. -define void @simple_enough(i32 %b, i32 %s) { -; CHECK-LABEL: @simple_enough( -; CHECK-NEXT: [[S8:%.*]] = shl i32 [[S:%.*]], 3 -; CHECK-NEXT: [[T1:%.*]] = add i32 [[B:%.*]], [[S8]] -; CHECK-NEXT: call void @foo(i32 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = add i32 [[B]], [[S]] -; CHECK-NEXT: call void @foo(i32 [[T2]]) -; CHECK-NEXT: ret void -; - %s8 = shl i32 %s, 3 - %t1 = add i32 %b, %s8 - call void @foo(i32 %t1) - %t2 = add i32 %b, %s - call void @foo(i32 %t2) - ret void -} - -define void @slsr_strided_add_128bit(i128 %b, i128 %s) { -; CHECK-LABEL: @slsr_strided_add_128bit( -; CHECK-NEXT: [[S125:%.*]] = shl i128 [[S:%.*]], 125 -; CHECK-NEXT: [[T1:%.*]] = add i128 [[B:%.*]], [[S125]] -; CHECK-NEXT: call void @bar(i128 [[T1]]) -; CHECK-NEXT: [[T2:%.*]] = add i128 [[T1]], [[S125]] -; CHECK-NEXT: call void @bar(i128 [[T2]]) -; CHECK-NEXT: ret void -; - %s125 = shl i128 %s, 125 - %s126 = shl i128 %s, 126 - %t1 = add i128 %b, %s125 - call void @bar(i128 %t1) - %t2 = add i128 %b, %s126 - call void @bar(i128 %t2) - ret void -} - -declare void @foo(i32) -declare void @voo(<2 x i32>) -declare void @bar(i128) diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll deleted file mode 100644 index b9bb4faf1b4..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll +++ /dev/null @@ -1,191 +0,0 @@ -; RUN: opt < %s -slsr -gvn -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32" - -; foo(input[0]); -; foo(input[s]); -; foo(input[s * 2]); -; => -; p0 = &input[0]; -; foo(*p); -; p1 = p0 + s; -; foo(*p1); -; p2 = p1 + s; -; foo(*p2); -define void @slsr_gep(i32* %input, i64 %s) { -; CHECK-LABEL: @slsr_gep( - ; v0 = input[0]; - %p0 = getelementptr inbounds i32, i32* %input, i64 0 - call void @foo(i32* %p0) - - ; v1 = input[s]; - %p1 = getelementptr inbounds i32, i32* %input, i64 %s -; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s - call void @foo(i32* %p1) - - ; v2 = input[s * 2]; - %s2 = shl nsw i64 %s, 1 - %p2 = getelementptr inbounds i32, i32* %input, i64 %s2 -; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %s - call void @foo(i32* %p2) - - ret void -} - -; foo(input[0]); -; foo(input[(long)s]); -; foo(input[(long)(s * 2)]); -; => -; p0 = &input[0]; -; foo(*p); -; p1 = p0 + (long)s; -; foo(*p1); -; p2 = p1 + (long)s; -; foo(*p2); -define void @slsr_gep_sext(i32* %input, i32 %s) { -; CHECK-LABEL: @slsr_gep_sext( - ; v0 = input[0]; - %p0 = getelementptr inbounds i32, i32* %input, i64 0 - call void @foo(i32* %p0) - - ; v1 = input[s]; - %t = sext i32 %s to i64 - %p1 = getelementptr inbounds i32, i32* %input, i64 %t -; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %t - call void @foo(i32* %p1) - - ; v2 = input[s * 2]; - %s2 = shl nsw i32 %s, 1 - %t2 = sext i32 %s2 to i64 - %p2 = getelementptr inbounds i32, i32* %input, i64 %t2 -; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %t - call void @foo(i32* %p2) - - ret void -} - -; int input[10][5]; -; foo(input[s][t]); -; foo(input[s * 2][t]); -; foo(input[s * 3][t]); -; => -; p0 = &input[s][t]; -; foo(*p0); -; p1 = p0 + 5s; -; foo(*p1); -; p2 = p1 + 5s; -; foo(*p2); -define void @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) { -; CHECK-LABEL: @slsr_gep_2d( - ; v0 = input[s][t]; - %p0 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s, i64 %t - call void @foo(i32* %p0) - - ; v1 = input[s * 2][t]; - %s2 = shl nsw i64 %s, 1 -; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 5 - %p1 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s2, i64 %t -; CHECK: %p1 = getelementptr inbounds i32, i32* %p0, i64 [[BUMP]] - call void @foo(i32* %p1) - - ; v3 = input[s * 3][t]; - %s3 = mul nsw i64 %s, 3 - %p2 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s3, i64 %t -; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 [[BUMP]] - call void @foo(i32* %p2) - - ret void -} - -%struct.S = type <{ i64, i32 }> - -; In this case, the bump -; = (char *)&input[s * 2][t].f1 - (char *)&input[s][t].f1 -; = 60 * s -; which may not be divisible by typeof(input[s][t].f1) = 8. Therefore, we -; rewrite the candidates using byte offset instead of index offset as in -; @slsr_gep_2d. -define void @slsr_gep_uglygep([10 x [5 x %struct.S]]* %input, i64 %s, i64 %t) { -; CHECK-LABEL: @slsr_gep_uglygep( - ; v0 = input[s][t].f1; - %p0 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s, i64 %t, i32 0 - call void @bar(i64* %p0) - - ; v1 = input[s * 2][t].f1; - %s2 = shl nsw i64 %s, 1 -; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 60 - %p1 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s2, i64 %t, i32 0 -; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]] - call void @bar(i64* %p1) - - ; v2 = input[s * 3][t].f1; - %s3 = mul nsw i64 %s, 3 - %p2 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s3, i64 %t, i32 0 -; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]] - call void @bar(i64* %p2) - - ret void -} - -define void @slsr_out_of_bounds_gep(i32* %input, i32 %s) { -; CHECK-LABEL: @slsr_out_of_bounds_gep( - ; v0 = input[0]; - %p0 = getelementptr i32, i32* %input, i64 0 - call void @foo(i32* %p0) - - ; v1 = input[(long)s]; - %t = sext i32 %s to i64 - %p1 = getelementptr i32, i32* %input, i64 %t -; CHECK: %p1 = getelementptr i32, i32* %input, i64 %t - call void @foo(i32* %p1) - - ; v2 = input[(long)(s * 2)]; - %s2 = shl nsw i32 %s, 1 - %t2 = sext i32 %s2 to i64 - %p2 = getelementptr i32, i32* %input, i64 %t2 -; CHECK: %p2 = getelementptr i32, i32* %p1, i64 %t - call void @foo(i32* %p2) - - ret void -} - -define void @slsr_gep_128bit_index(i32* %input, i128 %s) { -; CHECK-LABEL: @slsr_gep_128bit_index( - ; p0 = &input[0] - %p0 = getelementptr inbounds i32, i32* %input, i128 0 - call void @foo(i32* %p0) - - ; p1 = &input[s << 125] - %s125 = shl nsw i128 %s, 125 - %p1 = getelementptr inbounds i32, i32* %input, i128 %s125 -; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i128 %s125 - call void @foo(i32* %p1) - - ; p2 = &input[s << 126] - %s126 = shl nsw i128 %s, 126 - %p2 = getelementptr inbounds i32, i32* %input, i128 %s126 -; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i128 %s126 - call void @foo(i32* %p2) - - ret void -} - -define void @slsr_gep_32bit_pointer(i32 addrspace(1)* %input, i64 %s) { -; CHECK-LABEL: @slsr_gep_32bit_pointer( - ; p1 = &input[s] - %p1 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s - call void @baz(i32 addrspace(1)* %p1) - - ; p2 = &input[s * 2] - %s2 = mul nsw i64 %s, 2 - %p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2 - ; %s2 is wider than the pointer size of addrspace(1), so do not factor it. -; CHECK: %p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2 - call void @baz(i32 addrspace(1)* %p2) - - ret void -} - -declare void @foo(i32*) -declare void @bar(i64*) -declare void @baz(i32 addrspace(1)*) diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll deleted file mode 100644 index 56b1d1fc5cd..00000000000 --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-mul.ll +++ /dev/null @@ -1,147 +0,0 @@ -; RUN: opt < %s -slsr -gvn -S | FileCheck %s - -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" - -define void @slsr1(i32 %b, i32 %s) { -; CHECK-LABEL: @slsr1( - ; foo(b * s); - %mul0 = mul i32 %b, %s -; CHECK: mul i32 -; CHECK-NOT: mul i32 - call void @foo(i32 %mul0) - - ; foo((b + 1) * s); - %b1 = add i32 %b, 1 - %mul1 = mul i32 %b1, %s - call void @foo(i32 %mul1) - - ; foo((b + 2) * s); - %b2 = add i32 %b, 2 - %mul2 = mul i32 %b2, %s - call void @foo(i32 %mul2) - - ret void -} - -define void @non_canonicalized(i32 %b, i32 %s) { -; CHECK-LABEL: @non_canonicalized( - ; foo(b * s); - %mul0 = mul i32 %b, %s -; CHECK: mul i32 -; CHECK-NOT: mul i32 - call void @foo(i32 %mul0) - - ; foo((1 + b) * s); - %b1 = add i32 1, %b - %mul1 = mul i32 %b1, %s - call void @foo(i32 %mul1) - - ; foo((2 + b) * s); - %b2 = add i32 2, %b - %mul2 = mul i32 %b2, %s - call void @foo(i32 %mul2) - - ret void -} - -define void @or(i32 %a, i32 %s) { - %b = shl i32 %a, 1 -; CHECK-LABEL: @or( - ; foo(b * s); - %mul0 = mul i32 %b, %s -; CHECK: [[base:[^ ]+]] = mul i32 - call void @foo(i32 %mul0) - - ; foo((b | 1) * s); - %b1 = or i32 %b, 1 - %mul1 = mul i32 %b1, %s -; CHECK: add i32 [[base]], %s - call void @foo(i32 %mul1) - - ; foo((b | 2) * s); - %b2 = or i32 %b, 2 - %mul2 = mul i32 %b2, %s -; CHECK: mul i32 %b2, %s - call void @foo(i32 %mul2) - - ret void -} - -; foo(a * b) -; foo((a + 1) * b) -; foo(a * (b + 1)) -; foo((a + 1) * (b + 1)) -define void @slsr2(i32 %a, i32 %b) { -; CHECK-LABEL: @slsr2( - %a1 = add i32 %a, 1 - %b1 = add i32 %b, 1 - %mul0 = mul i32 %a, %b -; CHECK: mul i32 -; CHECK-NOT: mul i32 - %mul1 = mul i32 %a1, %b - %mul2 = mul i32 %a, %b1 - %mul3 = mul i32 %a1, %b1 - - call void @foo(i32 %mul0) - call void @foo(i32 %mul1) - call void @foo(i32 %mul2) - call void @foo(i32 %mul3) - - ret void -} - -; The bump is a multiple of the stride. -; -; foo(b * s); -; foo((b + 2) * s); -; foo((b + 4) * s); -; => -; mul0 = b * s; -; bump = s * 2; -; mul1 = mul0 + bump; // GVN ensures mul1 and mul2 use the same bump. -; mul2 = mul1 + bump; -define void @slsr3(i32 %b, i32 %s) { -; CHECK-LABEL: @slsr3( - %mul0 = mul i32 %b, %s -; CHECK: mul i32 - call void @foo(i32 %mul0) - - %b1 = add i32 %b, 2 - %mul1 = mul i32 %b1, %s -; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = shl i32 %s, 1 -; CHECK: %mul1 = add i32 %mul0, [[BUMP]] - call void @foo(i32 %mul1) - - %b2 = add i32 %b, 4 - %mul2 = mul i32 %b2, %s -; CHECK: %mul2 = add i32 %mul1, [[BUMP]] - call void @foo(i32 %mul2) - - ret void -} - -; Do not rewrite a candidate if its potential basis does not dominate it. -; -; if (cond) -; foo(a * b); -; foo((a + 1) * b); -define void @not_dominate(i1 %cond, i32 %a, i32 %b) { -; CHECK-LABEL: @not_dominate( -entry: - %a1 = add i32 %a, 1 - br i1 %cond, label %then, label %merge - -then: - %mul0 = mul i32 %a, %b -; CHECK: %mul0 = mul i32 %a, %b - call void @foo(i32 %mul0) - br label %merge - -merge: - %mul1 = mul i32 %a1, %b -; CHECK: %mul1 = mul i32 %a1, %b - call void @foo(i32 %mul1) - ret void -} - -declare void @foo(i32) |

