diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-09-24 13:18:15 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-09-24 13:18:15 +0000 |
commit | f432011d33e394788d717809ad8955763efe4d9d (patch) | |
tree | 3ad9a68b7121eb430e0af1c652c178f63bfcbbce /llvm/test/Transforms/LoadStoreVectorizer | |
parent | b53feca372725647f7f3eaddee238629616eac4f (diff) | |
download | bcm5719-llvm-f432011d33e394788d717809ad8955763efe4d9d.tar.gz bcm5719-llvm-f432011d33e394788d717809ad8955763efe4d9d.zip |
AMDGPU: Fix private handling for allowsMisalignedMemoryAccesses
If the alignment is at least 4, this should report true.
Something still seems off with how < 4-byte types are
handled here though.
Fixing this seems to change how some combines get
to where they get, but somehow isn't changing the net
result.
llvm-svn: 342879
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer')
-rw-r--r-- | llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll | 26 |
1 files changed, 9 insertions, 17 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll index a44c1321fd9..4292cbcec85 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll @@ -8,13 +8,13 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32 -; ALIGNED: store i32 -; ALIGNED: store i32 -; ALIGNED: store i32 -; ALIGNED: store i32 +; ELT4-ALIGNED: store i32 +; ELT4-ALIGNED: store i32 +; ELT4-ALIGNED: store i32 +; ELT4-ALIGNED: store i32 -; ELT8-UNALIGNED: store <2 x i32> -; ELT8-UNALIGNED: store <2 x i32> +; ELT8: store <2 x i32> +; ELT8: store <2 x i32> ; ELT16-UNALIGNED: store <4 x i32> define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 { @@ -167,18 +167,10 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8( ; ELT4: store i32 ; ELT4: store i32 -; ELT8-ALIGNED: store i32 -; ELT8-ALIGNED: store i32 -; ELT8-ALIGNED: store i32 +; ELT8: store <2 x i32> +; ELT8: store i32 -; ELT8-UNALIGNED: store <2 x i32> -; ELT8-UNALIGNED: store i32 - -; ELT16-ALIGNED: store i32 -; ELT16-ALIGNED: store i32 -; ELT16-ALIGNED: store i32 - -; ELT16-UNALIGNED: store <3 x i32> +; ELT16: store <3 x i32> define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 |