diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e63c1568d07..fa1a5301bb7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23733,6 +23733,52 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask, return true; } +/// Check a target shuffle mask's inputs to see if we can set any values to +/// SM_SentinelZero - this is for elements that are known to be zero +/// (not just zeroable) from their inputs. +static bool setTargetShuffleZeroElements(SDValue N, + SmallVectorImpl<int> &Mask) { + bool IsUnary; + if (!isTargetShuffle(N.getOpcode())) + return false; + if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Mask, + IsUnary)) + return false; + + SDValue V1 = N.getOperand(0); + SDValue V2 = IsUnary ? V1 : N.getOperand(1); + + while (V1.getOpcode() == ISD::BITCAST) + V1 = V1->getOperand(0); + while (V2.getOpcode() == ISD::BITCAST) + V2 = V2->getOperand(0); + + for (int i = 0, Size = Mask.size(); i != Size; ++i) { + int M = Mask[i]; + + // Already decoded as SM_SentinelZero / SM_SentinelUndef. + if (M < 0) + continue; + + SDValue V = M < Size ? V1 : V2; + + // We are referencing an UNDEF input. + if (V.isUndef()) { + Mask[i] = SM_SentinelUndef; + continue; + } + + // TODO - handle the Size != (int)V.getNumOperands() cases in future. + if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) + continue; + if (!X86::isZeroNode(V.getOperand(M % Size))) + continue; + Mask[i] = SM_SentinelZero; + } + + return true; +} + /// \brief Try to combine x86 target specific shuffles. static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -23806,6 +23852,96 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); } + // Attempt to merge blend(insertps(x,y),zero). + if (V0.getOpcode() == X86ISD::INSERTPS || + V1.getOpcode() == X86ISD::INSERTPS) { + assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); + + // Determine which elements are known to be zero. + SmallVector<int, 8> TargetMask; + if (!setTargetShuffleZeroElements(N, TargetMask)) + return SDValue(); + + // Helper function to take inner insertps node and attempt to + // merge the blend with zero into its zero mask. + auto MergeInsertPSAndBlend = [&](SDValue V, int Offset) { + if (V.getOpcode() != X86ISD::INSERTPS) + return SDValue(); + SDValue Op0 = V.getOperand(0); + SDValue Op1 = V.getOperand(1); + SDValue Op2 = V.getOperand(2); + unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue(); + + // Check each element of the blend node's target mask - must either + // be zeroable (and update the zero mask) or selects the element from + // the inner insertps node. + for (int i = 0; i != 4; ++i) + if (TargetMask[i] < 0) + InsertPSMask |= (1u << i); + else if (TargetMask[i] != (i + Offset)) + return SDValue(); + return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1, + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + }; + + if (SDValue V = MergeInsertPSAndBlend(V0, 0)) + return V; + if (SDValue V = MergeInsertPSAndBlend(V1, 4)) + return V; + } + return SDValue(); + } + case X86ISD::INSERTPS: { + assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); + SDValue Op2 = N.getOperand(2); + unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue(); + unsigned DstIdx = (InsertPSMask >> 4) & 3; + + // Attempt to merge insertps with an inner target shuffle node. + SmallVector<int, 8> TargetMask; + if (!setTargetShuffleZeroElements(Op0, TargetMask)) + return SDValue(); + + bool Updated = false; + bool UseInput00 = false; + bool UseInput01 = false; + for (int i = 0; i != 4; ++i) { + int M = TargetMask[i]; + if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { + // No change if element is already zero or the inserted element. + continue; + } else if (M < 0) { + // If the target mask is undef/zero then we must zero the element. + InsertPSMask |= (1u << i); + Updated = true; + continue; + } + + // The input vector element must be inline. + if (M != i && M != (i + 4)) + return SDValue(); + + // Determine which inputs of the target shuffle we're using. + UseInput00 |= (0 <= M && M < 4); + UseInput01 |= (4 <= M); + } + + // If we're not using both inputs of the target shuffle then use the + // referenced input directly. + if (UseInput00 && !UseInput01) { + Updated = true; + Op0 = Op0.getOperand(0); + } else if (!UseInput00 && UseInput01) { + Updated = true; + Op0 = Op0.getOperand(1); + } + + if (Updated) + return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1, + DAG.getConstant(InsertPSMask, DL, MVT::i8)); + return SDValue(); } default: @@ -28163,6 +28299,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::VZEXT: return performVZEXTCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles + case X86ISD::INSERTPS: case X86ISD::PALIGNR: case X86ISD::BLENDI: case X86ISD::UNPCKH: |