summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoadStoreVectorizer
diff options
context:
space:
mode:
authorAlina Sbirlea <asbirlea@google.com>2016-08-04 16:38:44 +0000
committerAlina Sbirlea <asbirlea@google.com>2016-08-04 16:38:44 +0000
commit6f937b1144aeaf93ee57bdfaeb13d6cbbde5850d (patch)
tree0783be537dc51123711822441ed6769bbcd5c7c9 /llvm/test/Transforms/LoadStoreVectorizer
parent98d78405b0cc2a71f703c5f8fa6a694f6136056e (diff)
downloadbcm5719-llvm-6f937b1144aeaf93ee57bdfaeb13d6cbbde5850d.tar.gz
bcm5719-llvm-6f937b1144aeaf93ee57bdfaeb13d6cbbde5850d.zip
LoadStoreVectorizer: Remove TargetBaseAlign. Keep alignment for stack adjustments.
Summary: TargetBaseAlign is no longer required since LSV checks if target allows misaligned accesses. A constant defining a base alignment is still needed for stack accesses where alignment can be adjusted. Previous patch (D22936) was reverted because tests were failing. This patch also fixes the cause of those failures: - x86 failing tests either did not have the right target, or the right alignment. - NVPTX failing tests did not have the right alignment. - AMDGPU failing test (merge-stores) should allow vectorization with the given alignment but the target info considers <3xi32> a non-standard type and gives up early. This patch removes the condition and only checks for a maximum size allowed and relies on the next condition checking for %4 for correctness. This should be revisited to include 3xi32 as a MVT type (on arsenm's non-immediate todo list). Note that checking the sizeInBits for a MVT is undefined (leads to an assertion failure), so we need to create an EVT, hence the interface change in allowsMisaligned to include the Context. Reviewers: arsenm, jlebar, tstellarAMD Subscribers: jholewinski, arsenm, mzolotukhin, llvm-commits Differential Revision: https://reviews.llvm.org/D23068 llvm-svn: 277735
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer')
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll32
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll2
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll2
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll4
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll20
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll2
6 files changed, 31 insertions, 31 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
index baf67764190..72c13b4d12e 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/merge-across-side-effects.ll
@@ -22,9 +22,9 @@ declare void @fn_readnone() #5
define void @load_fn(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn()
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -35,9 +35,9 @@ define void @load_fn(i32* %p) #0 {
define void @load_fn_nounwind(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn_nounwind() #0
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -48,9 +48,9 @@ define void @load_fn_nounwind(i32* %p) #0 {
define void @load_fn_nounwind_writeonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn_nounwind_writeonly() #1
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -60,9 +60,9 @@ define void @load_fn_nounwind_writeonly(i32* %p) #0 {
define void @load_fn_nounwind_readonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn_nounwind_readonly() #2
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -73,9 +73,9 @@ define void @load_fn_nounwind_readonly(i32* %p) #0 {
define void @load_fn_readonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn_readonly() #4
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -86,9 +86,9 @@ define void @load_fn_readonly(i32* %p) #0 {
define void @load_fn_writeonly(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn_writeonly() #3
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -98,9 +98,9 @@ define void @load_fn_writeonly(i32* %p) #0 {
define void @load_fn_readnone(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- %v0 = load i32, i32* %p
+ %v0 = load i32, i32* %p, align 8
call void @fn_readnone() #5
- %v1 = load i32, i32* %p.1
+ %v1 = load i32, i32* %p.1, align 4
ret void
}
@@ -193,9 +193,9 @@ define void @store_fn_writeonly(i32* %p) #0 {
define void @store_fn_readnone(i32* %p) #0 {
%p.1 = getelementptr i32, i32* %p, i32 1
- store i32 0, i32* %p
+ store i32 0, i32* %p, align 8
call void @fn_readnone() #5
- store i32 0, i32* %p.1
+ store i32 0, i32* %p.1, align 8
ret void
}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll
index c8c3c51dfb0..ff5e54f03ae 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/non-instr-bitcast.ll
@@ -6,7 +6,7 @@
define void @foo() {
; CHECK: load <4 x float>
- %a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 4
+ %a = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 0), align 16
%b = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 1), align 4
%c = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 2), align 4
%d = load float, float addrspace(1)* getelementptr inbounds ([4 x float], [4 x float] addrspace(1)* @global, i64 0, i64 3), align 4
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
index 13c0b3874eb..fd2ae51fc1f 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
index 9a7b294e4ce..12d882a51fa 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
@@ -17,8 +17,8 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {
entry:
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1
- %buff.p = load i8*, i8** %tmp1, align 8
- %buff.val = load i8, i8* %buff.p, align 8
+ %buff.p = load i8*, i8** %tmp1
+ %buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0
%buff.int = load i32, i32* %tmp0, align 8
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
index 23c43863015..bf75ecf6295 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
@@ -18,11 +18,11 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {
entry:
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
- %buff.p = load i8*, i8** %tmp1, align 8
- %buff.val = load i8, i8* %buff.p, align 8
+ %buff.p = load i8*, i8** %tmp1
+ %buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
- %buff.int = load i64, i64* %tmp0, align 8
+ %buff.int = load i64, i64* %tmp0, align 16
ret void
}
@@ -36,12 +36,12 @@ define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.
entry:
%nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1
- %buff.p = load i8*, i8** %tmp1, align 8
- %buff.val = load i8, i8* %buff.p, align 8
+ %buff.p = load i8*, i8** %tmp1
+ %buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
%nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0
- %buff.int = load i64, i64* %tmp0, align 8
+ %buff.int = load i64, i64* %tmp0, align 16
ret void
}
@@ -55,8 +55,8 @@ entry:
define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {
entry:
%tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
- %buff.p = load i8*, i8** %tmp1, align 8
- %buff.val = load i8, i8* %buff.p, align 8
+ %buff.p = load i8*, i8** %tmp1
+ %buff.val = load i8, i8* %buff.p
store i8 0, i8* %buff.p, align 8
br label %"for something"
@@ -64,7 +64,7 @@ entry:
%index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]
%tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
- %buff.int = load i64, i64* %tmp0, align 8
+ %buff.int = load i64, i64* %tmp0, align 16
%index.next = add i64 %index, 8
%cmp_res = icmp eq i64 %index.next, 8
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
index cee7d9f8f9b..34ec43d1a66 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S -o - %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
OpenPOWER on IntegriCloud