diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-10-01 15:33:01 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-10-01 15:33:01 +0000 |
| commit | ae17cf20ce6bd2fb9b7660fa124859916ae1c74a (patch) | |
| tree | 54330987fb8ec1d8be491551502b7b17f1bafbc2 /llvm/lib/Target | |
| parent | ccdd1ff49b2cebd46eb3b3972680cc07abff8a5e (diff) | |
| download | bcm5719-llvm-ae17cf20ce6bd2fb9b7660fa124859916ae1c74a.tar.gz bcm5719-llvm-ae17cf20ce6bd2fb9b7660fa124859916ae1c74a.zip | |
[X86][SSE] Always combine target shuffles to MOVSD/MOVSS
Now we can commute to BLENDPD/BLENDPS on SSE41+ targets if necessary, so simplify the combine matching where we can.
This required me to add a couple of scalar math movsd/moss fold patterns that hadn't been needed in the past.
llvm-svn: 283038
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 10 |
3 files changed, 19 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a1600230cc9..6c8def1c397 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25094,8 +25094,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, return true; } if (isTargetShuffleEquivalent(Mask, {0, 3}) && FloatDomain) { - // On SSE41 targets use BLENDPD (its commutable). - if (Subtarget.hasSSE2() && !Subtarget.hasSSE41()) { + if (Subtarget.hasSSE2()) { std::swap(V1, V2); Shuffle = X86ISD::MOVSD; ShuffleVT = MVT::v2f64; @@ -25103,12 +25102,9 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, } } if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && FloatDomain) { - // On SSE41 targets use BLENDPS (its commutable). - if (!Subtarget.hasSSE41()) { - Shuffle = X86ISD::MOVSS; - ShuffleVT = MVT::v4f32; - return true; - } + Shuffle = X86ISD::MOVSS; + ShuffleVT = MVT::v4f32; + return true; } if (isTargetShuffleEquivalent(Mask, {0, 0, 1, 1}) && FloatDomain) { V2 = V1; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 950fbc5e402..30437afb15b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8535,6 +8535,13 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>, // patterns we have to try to match. multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> { let Predicates = [HasAVX512] in { + // extracted scalar math op with insert via movss + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))))), + (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, + (COPY_TO_REGCLASS FR32:$src, VR128))>; + // extracted scalar math op with insert via blend def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 056479c7124..afbb1d3a173 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3217,9 +3217,15 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> { } - // Repeat everything for AVX, except for the movss + scalar combo... - // because that one shouldn't occur with AVX codegen? + // Repeat everything for AVX. let Predicates = [UseAVX] in { + // extracted scalar math op with insert via movss + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), + FR32:$src))))), + (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, + (COPY_TO_REGCLASS FR32:$src, VR128))>; + // extracted scalar math op with insert via blend def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), |

