diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 10 |
2 files changed, 19 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cb27a43558f..9089d138ddc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10252,16 +10252,17 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask); } - // For integer vector shuffles, try to collapse them into a shuffle of fewer - // lanes but wider integers. We cap this to not form integers larger than i64 - // but it might be interesting to form i128 integers to handle flipping the - // low and high halves of AVX 256-bit vectors. + // Try to collapse shuffles into using a vector type with fewer elements but + // wider element types. We cap this to not form integers or floating point + // elements wider than 64 bits, but it might be interesting to form i128 + // integers to handle flipping the low and high halves of AVX 256-bit vectors. SmallVector<int, 16> WidenedMask; - if (VT.isInteger() && VT.getScalarSizeInBits() < 64 && + if (VT.getScalarSizeInBits() < 64 && canWidenShuffleElements(Mask, WidenedMask)) { - MVT NewVT = - MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2), - VT.getVectorNumElements() / 2); + MVT NewEltVT = VT.isFloatingPoint() + ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) + : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); + MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2); V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); return DAG.getNode(ISD::BITCAST, dl, VT, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 486beddb4a9..f833d043027 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1378,6 +1378,11 @@ let Predicates = [HasAVX] in { def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (VMOVHPDrm VR128:$src1, addr:$src2)>; + // Also handle an i64 load because that may get selected as a faster way to + // load the data. + def : Pat<(v2f64 (X86Unpckl VR128:$src1, + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), + (VMOVHPDrm VR128:$src1, addr:$src2)>; } let Predicates = [UseSSE1] in { @@ -1398,6 +1403,11 @@ let Predicates = [UseSSE2] in { def : Pat<(v2f64 (X86Unpckl VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; + // Also handle an i64 load because that may get selected as a faster way to + // load the data. + def : Pat<(v2f64 (X86Unpckl VR128:$src1, + (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), + (MOVHPDrm VR128:$src1, addr:$src2)>; } //===----------------------------------------------------------------------===// |