diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-11 18:09:04 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-11 18:09:04 +0000 |
| commit | 38b290f7d72f67f9daca7d8ab3e32ffd90689cb8 (patch) | |
| tree | 7b7b22590be2526b74fa9a6f07e1276d47686e35 /llvm/test | |
| parent | 667a5b541fac4930898b2a75541781f4201f106d (diff) | |
| download | bcm5719-llvm-38b290f7d72f67f9daca7d8ab3e32ffd90689cb8.tar.gz bcm5719-llvm-38b290f7d72f67f9daca7d8ab3e32ffd90689cb8.zip | |
[X86] Remove patterns for inserting a load into a zero vector.
We can instead block the load folding isProfitableToFold. Then isel will emit a register->register move for the zeroing part and a separate load. The PostProcessISelDAG should be able to remove the register->register move.
This saves us patterns and fixes the fact that we only had unaligned load patterns. The test changes show places where we should have been using an aligned load.
llvm-svn: 336828
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll index 73abda48b31..f421d41f886 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll @@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_4f64_2f64_2z: ; AVX: # %bb.0: -; AVX-NEXT: vmovups 32(%rdi), %xmm0 +; AVX-NEXT: vmovaps 32(%rdi), %xmm0 ; AVX-NEXT: retq ; ; X32-AVX-LABEL: merge_4f64_2f64_2z: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: vmovups 32(%eax), %xmm0 +; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0 ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 %val0 = load <2 x double>, <2 x double>* %ptr0 @@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp { ; AVX-LABEL: merge_4i64_2i64_3z: ; AVX: # %bb.0: -; AVX-NEXT: vmovups 48(%rdi), %xmm0 +; AVX-NEXT: vmovaps 48(%rdi), %xmm0 ; AVX-NEXT: retq ; ; X32-AVX-LABEL: merge_4i64_2i64_3z: ; X32-AVX: # %bb.0: ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: vmovups 48(%eax), %xmm0 +; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0 ; X32-AVX-NEXT: retl %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3 %val0 = load <2 x i64>, <2 x i64>* %ptr0 |

