From 86db068e39ae631f408d6edaaf48dcc423a5311e Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Thu, 26 Oct 2017 13:59:15 +0000 Subject: [LSV] Avoid adding vectors of pointers as candidates Summary: We no longer add vectors of pointers as candidates for load/store vectorization. It does not seem to work anyway, but without this patch we can end up in asserts when trying to create casts between an integer type and the pointer of vectors type. The test case I've added used to assert like this when trying to cast between i64 and <2 x i16*>: opt: ../lib/IR/Instructions.cpp:2565: Assertion `castIsValid(op, S, Ty) && "Invalid cast!"' failed. #0 PrintStackTraceSignalHandler(void*) #1 SignalHandler(int) #2 __restore_rt #3 __GI_raise #4 __GI_abort #5 __GI___assert_fail #6 llvm::CastInst::Create(llvm::Instruction::CastOps, llvm::Value*, llvm::Type*, llvm::Twine const&, llvm::Instruction*) #7 llvm::IRBuilder::CreateBitOrPointerCast(llvm::Value*, llvm::Type*, llvm::Twine const&) #8 Vectorizer::vectorizeStoreChain(llvm::ArrayRef, llvm::SmallPtrSet*) Reviewers: arsenm Reviewed By: arsenm Subscribers: nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D39296 llvm-svn: 316665 --- .../LoadStoreVectorizer/AMDGPU/merge-stores.ll | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'llvm/test/Transforms/LoadStoreVectorizer') diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll index dbb7068eeae..19fc44bb6c8 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -632,6 +632,26 @@ define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias ret void } +; Verify that we no longer hit asserts for this test case. No change expected. +; CHECK-LABEL: @copy_vec_of_ptrs +; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1 +; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1 +; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4 +; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1 +; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1 +; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4 +define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out, + <2 x i16*> addrspace(1)* %in ) #0 { + %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1 + %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1 + %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4 + + %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1 + store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1 + store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4 + ret void +} + declare void @llvm.amdgcn.s.barrier() #1 attributes #0 = { nounwind } -- cgit v1.2.3