diff options
author | Craig Topper <craig.topper@intel.com> | 2019-07-06 17:59:45 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-07-06 17:59:45 +0000 |
commit | 913105ca42b88c87596303e0ce72c9ba0a67382b (patch) | |
tree | c77f2794a24c87cabf937804a454ba3e47a1579c | |
parent | 8c036bf784eb8411087fce098eac8353367349ac (diff) | |
download | bcm5719-llvm-913105ca42b88c87596303e0ce72c9ba0a67382b.tar.gz bcm5719-llvm-913105ca42b88c87596303e0ce72c9ba0a67382b.zip |
[X86] Add patterns to select MOVLPDrm from MOVSD+load and MOVHPD from UNPCKL+load.
These narrow the load so we can only do it if the load isn't
volatile.
There also tests in vector-shuffle-128-v4.ll that this should
support, but we don't seem to fold bitcast+load on pre-sse4.2
targets due to the slow unaligned mem 16 flag.
llvm-svn: 365266
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll | 12 |
2 files changed, 18 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index dc2db1e8e61..76530adc152 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -748,6 +748,13 @@ let Predicates = [UseSSE2] in { (MOVLPDrm VR128:$src1, addr:$src2)>; } +let Predicates = [UseSSE2, NoSSE41_Or_OptForSize] in { + // Use MOVLPD to load into the low bits from a full vector unless we can use + // BLENDPD. + def : Pat<(X86Movsd VR128:$src1, (v2f64 (nonvolatile_load addr:$src2))), + (MOVLPDrm VR128:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions //===----------------------------------------------------------------------===// @@ -2075,6 +2082,13 @@ let Predicates = [HasAVX1Only] in { (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; } +let Predicates = [UseSSE2] in { + // Use MOVHPD if the load isn't aligned enough for UNPCKLPD. + def : Pat<(v2f64 (X86Unpckl VR128:$src1, + (v2f64 (nonvolatile_load addr:$src2)))), + (MOVHPDrm VR128:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Extract Floating-Point Sign mask //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index 761855e5c62..2b42d33000a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -1309,8 +1309,7 @@ define <2 x double> @shuffle_mem_v2f64_31(<2 x double> %a, <2 x double>* %b) { define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { ; SSE-LABEL: shuffle_mem_v2f64_02: ; SSE: # %bb.0: -; SSE-NEXT: movups (%rdi), %xmm1 -; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_mem_v2f64_02: @@ -1325,20 +1324,17 @@ define <2 x double> @shuffle_mem_v2f64_02(<2 x double> %a, <2 x double>* %pb) { define <2 x double> @shuffle_mem_v2f64_21(<2 x double> %a, <2 x double>* %pb) { ; SSE2-LABEL: shuffle_mem_v2f64_21: ; SSE2: # %bb.0: -; SSE2-NEXT: movupd (%rdi), %xmm1 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE3-LABEL: shuffle_mem_v2f64_21: ; SSE3: # %bb.0: -; SSE3-NEXT: movupd (%rdi), %xmm1 -; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: shuffle_mem_v2f64_21: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movupd (%rdi), %xmm1 -; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSSE3-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_mem_v2f64_21: |