diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 66 |
1 files changed, 33 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 36877ac13bc..9485b02735b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10515,26 +10515,6 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); - // If we have a blend of two same-type PACKUS operations and the blend aligns - // with the low and high halves, we can just merge the PACKUS operations. - // This is particularly important as it lets us merge shuffles that this - // routine itself creates. - auto GetPackNode = [](SDValue V) { - V = peekThroughBitcasts(V); - return V.getOpcode() == X86ISD::PACKUS ? V : SDValue(); - }; - if (SDValue V1Pack = GetPackNode(V1)) - if (SDValue V2Pack = GetPackNode(V2)) { - EVT PackVT = V1Pack.getValueType(); - if (PackVT == V2Pack.getValueType()) - return DAG.getBitcast(MVT::v2i64, - DAG.getNode(X86ISD::PACKUS, DL, PackVT, - Mask[0] == 0 ? V1Pack.getOperand(0) - : V1Pack.getOperand(1), - Mask[1] == 2 ? V2Pack.getOperand(0) - : V2Pack.getOperand(1))); - } - // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -28803,8 +28783,37 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SDLoc DL(N); MVT VT = N.getSimpleValueType(); SmallVector<int, 4> Mask; - unsigned Opcode = N.getOpcode(); + + // Combine binary shuffle of 2 similar 'Horizontal' instructions into a + // single instruction. + if (VT.getScalarSizeInBits() == 64 && + (Opcode == X86ISD::MOVSD || Opcode == X86ISD::UNPCKH || + Opcode == X86ISD::UNPCKL)) { + auto BC0 = peekThroughBitcasts(N.getOperand(0)); + auto BC1 = peekThroughBitcasts(N.getOperand(1)); + EVT VT0 = BC0.getValueType(); + EVT VT1 = BC1.getValueType(); + unsigned Opcode0 = BC0.getOpcode(); + unsigned Opcode1 = BC1.getOpcode(); + if (Opcode0 == Opcode1 && VT0 == VT1 && + (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || + Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB || + Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) { + SDValue Lo, Hi; + if (Opcode == X86ISD::MOVSD) { + Lo = BC1.getOperand(0); + Hi = BC0.getOperand(1); + } else { + Lo = BC0.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0); + Hi = BC1.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0); + } + SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi); + DCI.AddToWorklist(Horiz.getNode()); + return DAG.getBitcast(VT, Horiz); + } + } + switch (Opcode) { case X86ISD::PSHUFD: case X86ISD::PSHUFLW: @@ -28813,17 +28822,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, assert(Mask.size() == 4); break; case X86ISD::UNPCKL: { - auto Op0 = N.getOperand(0); - auto Op1 = N.getOperand(1); - unsigned Opcode0 = Op0.getOpcode(); - unsigned Opcode1 = Op1.getOpcode(); - - // Combine X86ISD::UNPCKL with 2 X86ISD::FHADD inputs into a single - // X86ISD::FHADD. This is generated by UINT_TO_FP v2f64 scalarization. - // TODO: Add other horizontal operations as required. - if (VT == MVT::v2f64 && Opcode0 == Opcode1 && Opcode0 == X86ISD::FHADD) - return DAG.getNode(Opcode0, DL, VT, Op0.getOperand(0), Op1.getOperand(0)); - // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE // moves upper half elements into the lower half part. For example: @@ -28841,7 +28839,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (!VT.is128BitVector()) return SDValue(); - if (Op0.isUndef() && Opcode1 == ISD::VECTOR_SHUFFLE) { + auto Op0 = N.getOperand(0); + auto Op1 = N.getOperand(1); + if (Op0.isUndef() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE) { ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask(); unsigned NumElts = VT.getVectorNumElements(); |