diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 13 |
2 files changed, 17 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 37338ab5796..7a32ce59d31 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29387,14 +29387,14 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) { V2 = V1; V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); - Shuffle = X86ISD::MOVLHPS; - SrcVT = DstVT = MVT::v4f32; + Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; + SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) { V2 = V1; - Shuffle = X86ISD::MOVHLPS; - SrcVT = DstVT = MVT::v4f32; + Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; + SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() && diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c8ad7d9eabb..af40b009d97 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -862,6 +862,19 @@ let Constraints = "$src1 = $dst" in { Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; } +// TODO: This is largely to trick fastisel into ignoring the pattern. +def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2), + (X86Unpckh node:$src1, node:$src2), [{ + return N->getOperand(0) == N->getOperand(1); +}]>; + +let Predicates = [UseSSE2] in { + // TODO: This is a hack pattern to allow lowering to emit unpckh instead of + // movhlps for sse2 without changing a bunch of tests. + def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)), + (MOVHLPSrr VR128:$src, VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// |

