diff options
| author | Artem Belevich <tra@google.com> | 2018-05-11 21:13:19 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2018-05-11 21:13:19 +0000 |
| commit | c2cd5d5ce053b49c1a75216a8e9c7b4f298319ad (patch) | |
| tree | fe4cdac48ccc2013e5052da5b6b206d9c3963ec7 /llvm/test | |
| parent | f108a8fee10fb189faff4b2edbecbec8b6322a58 (diff) | |
| download | bcm5719-llvm-c2cd5d5ce053b49c1a75216a8e9c7b4f298319ad.tar.gz bcm5719-llvm-c2cd5d5ce053b49c1a75216a8e9c7b4f298319ad.zip | |
[Split GEP] handle trunc() in separate-const-offset-from-gep pass.
Let separate-const-offset-from-gep pass handle trunc() when it calculates
constant offset relative to base. The pass itself may insert trunc()
instructions when it canonicalises array indices to pointer-size integers
and needs to handle trunc() in order to evaluate the offset.
Differential Revision: https://reviews.llvm.org/D46732
llvm-svn: 332142
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll | 10 | ||||
| -rw-r--r-- | llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll | 39 |
2 files changed, 35 insertions, 14 deletions
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll index 0b65035117a..4f9e0ec88ad 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll @@ -1,5 +1,8 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX -; RUN: opt < %s -S -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -gvn | FileCheck %s --check-prefix=IR +; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_20 \ +; RUN: | FileCheck %s --check-prefix=PTX +; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -S -separate-const-offset-from-gep \ +; RUN: -reassociate-geps-verify-no-dead-code -gvn \ +; RUN: | FileCheck %s --check-prefix=IR ; Verifies the SeparateConstOffsetFromGEP pass. ; The following code computes @@ -12,9 +15,6 @@ ; ; so the backend can emit PTX that uses fewer virtual registers. -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-unknown-unknown" - @array = internal addrspace(3) constant [32 x [32 x float]] zeroinitializer, align 4 define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) { diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index eeeac196374..917e0587ae9 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -1,14 +1,10 @@ -; RUN: opt < %s -separate-const-offset-from-gep -reassociate-geps-verify-no-dead-code -S | FileCheck %s +; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -separate-const-offset-from-gep \ +; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s ; Several unit tests for -separate-const-offset-from-gep. The transformation ; heavily relies on TargetTransformInfo, so we put these tests under ; target-specific folders. -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; target triple is necessary; otherwise TargetTransformInfo rejects any -; addressing mode. -target triple = "nvptx64-unknown-unknown" - %struct.S = type { float, double } @struct_array = global [1024 x %struct.S] zeroinitializer, align 16 @@ -271,9 +267,34 @@ entry: ; CHECK-NOT: add %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 ; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 -; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8* -; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64 -; CHECK: bitcast +; CHECK: getelementptr inbounds %struct2, %struct2* [[PTR]], i64 -3 + ret %struct2* %ptr2 +; CHECK-NEXT: ret +} + +; Check that we can see through explicit trunc() instruction. +define %struct2* @trunk_explicit(%struct0* %ptr, i64 %idx) { +; CHECK-LABEL: @trunk_explicit( +entry: + %idx0 = trunc i64 1 to i32 + %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i32 %idx0, i32 3, i64 %idx, i32 1 +; CHECK-NOT: trunc +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 +; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151 + ret %struct2* %ptr2 +; CHECK-NEXT: ret +} + +; Check that we can deal with trunc inserted by +; canonicalizeArrayIndicesToPointerSize() if size of an index is larger than +; that of the pointer. +define %struct2* @trunk_long_idx(%struct0* %ptr, i64 %idx) { +; CHECK-LABEL: @trunk_long_idx( +entry: + %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i65 1, i32 3, i64 %idx, i32 1 +; CHECK-NOT: trunc +; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1 +; CHECK: getelementptr inbounds %struct2, %struct2* %0, i64 151 ret %struct2* %ptr2 ; CHECK-NEXT: ret } |

