From e4a89a646213718c86da9f81cc8b91030e84c4c7 Mon Sep 17 00:00:00 2001 From: Farhana Aleen Date: Fri, 21 Jul 2017 21:35:00 +0000 Subject: X86InterleaveAccess: A fix for bug33826 Reviewers: DavidKreitzer Differential Revision: https://reviews.llvm.org/D35638 llvm-svn: 308784 --- .../X86/interleaved-accesses-64bits-avx.ll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'llvm/test/Transforms/InterleavedAccess') diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll index dcef57fc625..1a48be2e7cd 100644 --- a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll @@ -217,3 +217,20 @@ define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double> store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 ret void } + +; This verifies whether the test passes and does not hit any assertions. +; Today, X86InterleavedAccess could have handled this case and +; generate transposed sequence by extending the current implementation +; which would be creating dummy vectors of undef. But it decided not to +; optimize these cases where the load-size is less than Factor * NumberOfElements. +; Because a better sequence can easily be generated by CG. + +@a = local_unnamed_addr global <4 x double> zeroinitializer, align 32 +; Function Attrs: norecurse nounwind readonly uwtable +define <4 x double> @test_unhandled(<4 x double> %b) { +entry: + %0 = load <4 x double>, <4 x double>* @a, align 32 + %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> + %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> + ret <4 x double> %shuffle +} -- cgit v1.2.3