summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/InterleavedAccess
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms/InterleavedAccess')
-rw-r--r--llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll17
1 files changed, 17 insertions, 0 deletions
diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
index dcef57fc625..1a48be2e7cd 100644
--- a/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
+++ b/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
@@ -217,3 +217,20 @@ define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double>
store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
ret void
}
+
+; This verifies whether the test passes and does not hit any assertions.
+; Today, X86InterleavedAccess could have handled this case and
+; generate transposed sequence by extending the current implementation
+; which would be creating dummy vectors of undef. But it decided not to
+; optimize these cases where the load-size is less than Factor * NumberOfElements.
+; Because a better sequence can easily be generated by CG.
+
+@a = local_unnamed_addr global <4 x double> zeroinitializer, align 32
+; Function Attrs: norecurse nounwind readonly uwtable
+define <4 x double> @test_unhandled(<4 x double> %b) {
+entry:
+ %0 = load <4 x double>, <4 x double>* @a, align 32
+ %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
+ %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
+ ret <4 x double> %shuffle
+}
OpenPOWER on IntegriCloud