diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_load-3.ll | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/widen_load-3.ll b/llvm/test/CodeGen/X86/widen_load-3.ll index 15c06b2ba08..ad8d2324e5c 100644 --- a/llvm/test/CodeGen/X86/widen_load-3.ll +++ b/llvm/test/CodeGen/X86/widen_load-3.ll @@ -126,3 +126,51 @@ define <7 x i64> @load7_unaligned(<7 x i64>* %x) { %x1 = load <7 x i64>, <7 x i64>* %x, align 1 ret <7 x i64> %x1 } + +; PR42305 - https://bugs.llvm.org/show_bug.cgi?id=42305 + +define void @load_split(<8 x float>* %ld, <4 x float>* %st1, <4 x float>* %st2) { +; X86-SSE-LABEL: load_split: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movups (%edx), %xmm0 +; X86-SSE-NEXT: movups 16(%edx), %xmm1 +; X86-SSE-NEXT: movups %xmm0, (%ecx) +; X86-SSE-NEXT: movups %xmm1, (%eax) +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: load_split: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX-NEXT: vmovups (%edx), %xmm0 +; X86-AVX-NEXT: vmovups 16(%edx), %xmm1 +; X86-AVX-NEXT: vmovups %xmm0, (%ecx) +; X86-AVX-NEXT: vmovups %xmm1, (%eax) +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: load_split: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movups (%rdi), %xmm0 +; X64-SSE-NEXT: movups 16(%rdi), %xmm1 +; X64-SSE-NEXT: movups %xmm0, (%rsi) +; X64-SSE-NEXT: movups %xmm1, (%rdx) +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: load_split: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovups (%rdi), %xmm0 +; X64-AVX-NEXT: vmovups 16(%rdi), %xmm1 +; X64-AVX-NEXT: vmovups %xmm0, (%rsi) +; X64-AVX-NEXT: vmovups %xmm1, (%rdx) +; X64-AVX-NEXT: retq + %t256 = load <8 x float>, <8 x float>* %ld, align 1 + %b128 = shufflevector <8 x float> %t256, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + store <4 x float> %b128, <4 x float>* %st1, align 1 + %t128 = shufflevector <8 x float> %t256, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + store <4 x float> %t128, <4 x float>* %st2, align 1 + ret void +} |

