diff options
author | Roman Tereshin <rtereshin@apple.com> | 2018-07-20 20:10:04 +0000 |
---|---|---|
committer | Roman Tereshin <rtereshin@apple.com> | 2018-07-20 20:10:04 +0000 |
commit | 31d52847ef77c7d01798090ac1f28befcc2c7f8c (patch) | |
tree | be66a02f9811c6702bc832af34f58d269a8fa66d /llvm/test/Transforms/LoadStoreVectorizer/AMDGPU | |
parent | 6fc21c252289e62cf58db37e3efb39a3f3007a0e (diff) | |
download | bcm5719-llvm-31d52847ef77c7d01798090ac1f28befcc2c7f8c.tar.gz bcm5719-llvm-31d52847ef77c7d01798090ac1f28befcc2c7f8c.zip |
Reapply "[LSV] Refactoring + supporting bitcasts to a type of different size"
This reapplies commit r337489 reverted by r337541
Additionally, this commit contains a speculative fix to the issue reported in r337541
(the report does not contain an actionable reproducer, just a stack trace)
llvm-svn: 337606
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer/AMDGPU')
-rw-r--r-- | llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll | 53 |
1 files changed, 51 insertions, 2 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll index b67dc058453..4de8b0fd7c6 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/gep-bitcast.ll @@ -56,8 +56,8 @@ define void @vect_zext_bitcast_i8_st1_to_i32_idx(i8 addrspace(1)* %arg1, i32 %ba ret void } -; TODO: This can be vectorized, but currently vectorizer unable to do it. ; CHECK-LABEL: @vect_zext_bitcast_i8_st4_to_i32_idx +; CHECK: load <4 x i32> define void @vect_zext_bitcast_i8_st4_to_i32_idx(i8 addrspace(1)* %arg1, i32 %base) { %add1 = add nuw i32 %base, 0 %zext1 = zext i32 %add1 to i64 @@ -74,10 +74,59 @@ define void @vect_zext_bitcast_i8_st4_to_i32_idx(i8 addrspace(1)* %arg1, i32 %ba %gep3 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %zext3 %f2i3 = bitcast i8 addrspace(1)* %gep3 to i32 addrspace(1)* %load3 = load i32, i32 addrspace(1)* %f2i3, align 4 - %add4 = add nuw i32 %base, 16 + %add4 = add nuw i32 %base, 12 %zext4 = zext i32 %add4 to i64 %gep4 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %zext4 %f2i4 = bitcast i8 addrspace(1)* %gep4 to i32 addrspace(1)* %load4 = load i32, i32 addrspace(1)* %f2i4, align 4 ret void } + +; CHECK-LABEL: @vect_zext_bitcast_negative_ptr_delta +; CHECK: load <2 x i32> +define void @vect_zext_bitcast_negative_ptr_delta(i32 addrspace(1)* %p, i32 %base) { + %p.bitcasted = bitcast i32 addrspace(1)* %p to i16 addrspace(1)* + %a.offset = add nuw i32 %base, 4 + %t.offset.zexted = zext i32 %base to i64 + %a.offset.zexted = zext i32 %a.offset to i64 + %t.ptr = getelementptr inbounds i16, i16 addrspace(1)* %p.bitcasted, i64 %t.offset.zexted + %a.ptr = getelementptr inbounds i16, i16 addrspace(1)* %p.bitcasted, i64 %a.offset.zexted + %b.ptr = getelementptr inbounds i16, i16 addrspace(1)* %t.ptr, i64 6 + %a.ptr.bitcasted = bitcast i16 addrspace(1)* %a.ptr to i32 addrspace(1)* + %b.ptr.bitcasted = bitcast i16 addrspace(1)* %b.ptr to i32 addrspace(1)* + %a.val = load i32, i32 addrspace(1)* %a.ptr.bitcasted + %b.val = load i32, i32 addrspace(1)* %b.ptr.bitcasted + ret void +} + +; Check i1 corner case +; CHECK-LABEL: @zexted_i1_gep_index +; CHECK: load i32 +; CHECK: load i32 +define void @zexted_i1_gep_index(i32 addrspace(1)* %p, i32 %val) { + %selector = icmp eq i32 %val, 0 + %flipped = xor i1 %selector, 1 + %index.0 = zext i1 %selector to i64 + %index.1 = zext i1 %flipped to i64 + %gep.0 = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %index.0 + %gep.1 = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %index.1 + %val0 = load i32, i32 addrspace(1)* %gep.0 + %val1 = load i32, i32 addrspace(1)* %gep.1 + ret void +} + +; Check i1 corner case +; CHECK-LABEL: @sexted_i1_gep_index +; CHECK: load i32 +; CHECK: load i32 +define void @sexted_i1_gep_index(i32 addrspace(1)* %p, i32 %val) { + %selector = icmp eq i32 %val, 0 + %flipped = xor i1 %selector, 1 + %index.0 = sext i1 %selector to i64 + %index.1 = sext i1 %flipped to i64 + %gep.0 = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %index.0 + %gep.1 = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 %index.1 + %val0 = load i32, i32 addrspace(1)* %gep.0 + %val1 = load i32, i32 addrspace(1)* %gep.1 + ret void +} |