diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-06-18 21:34:26 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-06-18 21:34:26 +0000 |
commit | c3e018e6fd3b4c9212b4a01ce245ec18118f781b (patch) | |
tree | 6f687af5511bedcf6cf82eb936d2479e54e91268 /llvm | |
parent | de94fa64382c02c40a98ed6428c0dae1989cb513 (diff) | |
download | bcm5719-llvm-c3e018e6fd3b4c9212b4a01ce245ec18118f781b.tar.gz bcm5719-llvm-c3e018e6fd3b4c9212b4a01ce245ec18118f781b.zip |
add test to show suboptimal load merging behavior
llvm-svn: 240063
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll b/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll index aeac2e1fe0a..d979c16f4ab 100644 --- a/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll +++ b/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll @@ -75,6 +75,31 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) { ret <8 x float> %v3 } +define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) { +;; FIXME: The first load is 32-byte aligned, so the second load should get merged. +; AVXSLOW-LABEL: combine_16_byte_loads_aligned: +; AVXSLOW: # BB#0: +; AVXSLOW-NEXT: vmovaps 48(%rdi), %xmm0 +; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 +; AVXSLOW-NEXT: retq +; +; AVXFAST-LABEL: combine_16_byte_loads_aligned: +; AVXFAST: # BB#0: +; AVXFAST-NEXT: vmovaps 48(%rdi), %ymm0 +; AVXFAST-NEXT: retq +; +; AVX2-LABEL: combine_16_byte_loads_aligned: +; AVX2: # BB#0: +; AVX2-NEXT: vmovaps 48(%rdi), %ymm0 +; AVX2-NEXT: retq + %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3 + %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4 + %v1 = load <4 x float>, <4 x float>* %ptr1, align 32 + %v2 = load <4 x float>, <4 x float>* %ptr2, align 1 + %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x float> %v3 +} + ; Swap the order of the shufflevector operands to ensure that the pattern still matches. define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) { |