From f432011d33e394788d717809ad8955763efe4d9d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Sep 2018 13:18:15 +0000 Subject: AMDGPU: Fix private handling for allowsMisalignedMemoryAccesses If the alignment is at least 4, this should report true. Something still seems off with how < 4-byte types are handled here though. Fixing this seems to change how some combines get to where they get, but somehow isn't changing the net result. llvm-svn: 342879 --- .../AMDGPU/merge-stores-private.ll | 26 ++++++++-------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'llvm/test/Transforms/LoadStoreVectorizer') diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll index a44c1321fd9..4292cbcec85 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll @@ -8,13 +8,13 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32 -; ALIGNED: store i32 -; ALIGNED: store i32 -; ALIGNED: store i32 -; ALIGNED: store i32 +; ELT4-ALIGNED: store i32 +; ELT4-ALIGNED: store i32 +; ELT4-ALIGNED: store i32 +; ELT4-ALIGNED: store i32 -; ELT8-UNALIGNED: store <2 x i32> -; ELT8-UNALIGNED: store <2 x i32> +; ELT8: store <2 x i32> +; ELT8: store <2 x i32> ; ELT16-UNALIGNED: store <4 x i32> define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 { @@ -167,18 +167,10 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8( ; ELT4: store i32 ; ELT4: store i32 -; ELT8-ALIGNED: store i32 -; ELT8-ALIGNED: store i32 -; ELT8-ALIGNED: store i32 +; ELT8: store <2 x i32> +; ELT8: store i32 -; ELT8-UNALIGNED: store <2 x i32> -; ELT8-UNALIGNED: store i32 - -; ELT16-ALIGNED: store i32 -; ELT16-ALIGNED: store i32 -; ELT16-ALIGNED: store i32 - -; ELT16-UNALIGNED: store <3 x i32> +; ELT16: store <3 x i32> define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 -- cgit v1.2.3