diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-06-06 14:18:39 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-06-06 14:18:39 +0000 |
commit | f7113fd2704af1913471f812e595e463baada3d5 (patch) | |
tree | ec8c8db5a5903bfa5e083379d3242c7e7a35fc5c /llvm/test | |
parent | 8cd60a506781db87bd996768b69fef8865d0f2a6 (diff) | |
download | bcm5719-llvm-f7113fd2704af1913471f812e595e463baada3d5.tar.gz bcm5719-llvm-f7113fd2704af1913471f812e595e463baada3d5.zip |
[X86][AVX1] Split 256-bit vector non-temporal FastISel loads to keep it non-temporal (PR32744)
Extension to D33728
llvm-svn: 304798
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/fast-isel-nontemporal.ll | 36 |
1 files changed, 30 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll index 306012aa3bf..33d001cdc21 100644 --- a/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xfloat: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xfloat: @@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xdouble: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xdouble: @@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi8: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi8: @@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi16: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi16: @@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi32: @@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xi64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xi64: |