summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoadStoreVectorizer
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-09-24 13:18:15 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-09-24 13:18:15 +0000
commitf432011d33e394788d717809ad8955763efe4d9d (patch)
tree3ad9a68b7121eb430e0af1c652c178f63bfcbbce /llvm/test/Transforms/LoadStoreVectorizer
parentb53feca372725647f7f3eaddee238629616eac4f (diff)
downloadbcm5719-llvm-f432011d33e394788d717809ad8955763efe4d9d.tar.gz
bcm5719-llvm-f432011d33e394788d717809ad8955763efe4d9d.zip
AMDGPU: Fix private handling for allowsMisalignedMemoryAccesses
If the alignment is at least 4, this should report true. Something still seems off with how < 4-byte types are handled here though. Fixing this seems to change how some combines get to where they get, but somehow isn't changing the net result. llvm-svn: 342879
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer')
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll26
1 files changed, 9 insertions, 17 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
index a44c1321fd9..4292cbcec85 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
@@ -8,13 +8,13 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
-; ALIGNED: store i32
-; ALIGNED: store i32
-; ALIGNED: store i32
-; ALIGNED: store i32
+; ELT4-ALIGNED: store i32
+; ELT4-ALIGNED: store i32
+; ELT4-ALIGNED: store i32
+; ELT4-ALIGNED: store i32
-; ELT8-UNALIGNED: store <2 x i32>
-; ELT8-UNALIGNED: store <2 x i32>
+; ELT8: store <2 x i32>
+; ELT8: store <2 x i32>
; ELT16-UNALIGNED: store <4 x i32>
define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
@@ -167,18 +167,10 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(
; ELT4: store i32
; ELT4: store i32
-; ELT8-ALIGNED: store i32
-; ELT8-ALIGNED: store i32
-; ELT8-ALIGNED: store i32
+; ELT8: store <2 x i32>
+; ELT8: store i32
-; ELT8-UNALIGNED: store <2 x i32>
-; ELT8-UNALIGNED: store i32
-
-; ELT16-ALIGNED: store i32
-; ELT16-ALIGNED: store i32
-; ELT16-ALIGNED: store i32
-
-; ELT16-UNALIGNED: store <3 x i32>
+; ELT16: store <3 x i32>
define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
OpenPOWER on IntegriCloud