diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 69 |
1 files changed, 22 insertions, 47 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e59da306dae..b96794876d2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23441,7 +23441,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::MUL, dl, VT, R, Scale); // If possible, lower this shift as a sequence of two shifts by - // constant plus a MOVSS/MOVSD/PBLEND instead of scalarizing it. + // constant plus a BLENDing shuffle instead of scalarizing it. // Example: // (v4i32 (srl A, (build_vector < X, Y, Y, Y>))) // @@ -23449,64 +23449,39 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>))) // // The advantage is that the two shifts from the example would be - // lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing - // the vector shift into four scalar shifts plus four pairs of vector - // insert/extract. + // lowered as X86ISD::VSRLI nodes in parallel before blending. if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32)) { - bool UseMOVSD = false; - bool CanBeSimplified; - // The splat value for the first packed shift (the 'X' from the example). - SDValue Amt1 = Amt->getOperand(0); - // The splat value for the second packed shift (the 'Y' from the example). - SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : Amt->getOperand(2); - - // See if it is possible to replace this node with a sequence of - // two shifts followed by a MOVSS/MOVSD/PBLEND. - if (VT == MVT::v4i32) { - // Check if it is legal to use a MOVSS. - CanBeSimplified = Amt2 == Amt->getOperand(2) && - Amt2 == Amt->getOperand(3); - if (!CanBeSimplified) { - // Otherwise, check if we can still simplify this node using a MOVSD. - CanBeSimplified = Amt1 == Amt->getOperand(1) && - Amt->getOperand(2) == Amt->getOperand(3); - UseMOVSD = true; - Amt2 = Amt->getOperand(2); + SDValue Amt1, Amt2; + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<int, 8> ShuffleMask; + for (unsigned i = 0; i != NumElts; ++i) { + SDValue A = Amt->getOperand(i); + if (A.isUndef()) { + ShuffleMask.push_back(SM_SentinelUndef); + continue; } - } else { - // Do similar checks for the case where the machine value type - // is MVT::v8i16. - CanBeSimplified = Amt1 == Amt->getOperand(1); - for (unsigned i=3; i != 8 && CanBeSimplified; ++i) - CanBeSimplified = Amt2 == Amt->getOperand(i); - - if (!CanBeSimplified) { - UseMOVSD = true; - CanBeSimplified = true; - Amt2 = Amt->getOperand(4); - for (unsigned i=0; i != 4 && CanBeSimplified; ++i) - CanBeSimplified = Amt1 == Amt->getOperand(i); - for (unsigned j=4; j != 8 && CanBeSimplified; ++j) - CanBeSimplified = Amt2 == Amt->getOperand(j); + if (!Amt1 || Amt1 == A) { + ShuffleMask.push_back(i); + Amt1 = A; + continue; + } + if (!Amt2 || Amt2 == A) { + ShuffleMask.push_back(i + NumElts); + Amt2 = A; + continue; } + break; } - if (CanBeSimplified && isa<ConstantSDNode>(Amt1) && + if (ShuffleMask.size() == NumElts && isa<ConstantSDNode>(Amt1) && isa<ConstantSDNode>(Amt2)) { - // Replace this node with two shifts followed by a MOVSS/MOVSD/PBLEND. SDValue Splat1 = DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); SDValue Splat2 = DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT); SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); - SDValue BitCast1 = DAG.getBitcast(MVT::v4i32, Shift1); - SDValue BitCast2 = DAG.getBitcast(MVT::v4i32, Shift2); - if (UseMOVSD) - return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl, BitCast1, - BitCast2, {0, 1, 6, 7})); - return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl, BitCast1, - BitCast2, {0, 5, 6, 7})); + return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask); } } |