diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 112 | 
1 files changed, 67 insertions, 45 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0815f830ccc..7745e6c4ce9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8762,63 +8762,85 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,    assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");    assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");    assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); -  unsigned ZMask = 0; -  int V1DstIndex = -1; -  int V2DstIndex = -1; -  bool V1UsedInPlace = false; -  for (int i = 0; i < 4; ++i) { -    // Synthesize a zero mask from the zeroable elements (includes undefs). -    if (Zeroable[i]) { -      ZMask |= 1 << i; -      continue; -    } +  // Attempt to match INSERTPS with one element from VA or VB being +  // inserted into VA (or undef). If successful, V1, V2 and InsertPSMask +  // are updated. +  auto matchAsInsertPS = [&](SDValue VA, SDValue VB, +                             ArrayRef<int> CandidateMask) { +    unsigned ZMask = 0; +    int VADstIndex = -1; +    int VBDstIndex = -1; +    bool VAUsedInPlace = false; + +    for (int i = 0; i < 4; ++i) { +      // Synthesize a zero mask from the zeroable elements (includes undefs). +      if (Zeroable[i]) { +        ZMask |= 1 << i; +        continue; +      } -    // Flag if we use any V1 inputs in place. -    if (i == Mask[i]) { -      V1UsedInPlace = true; -      continue; +      // Flag if we use any VA inputs in place. +      if (i == CandidateMask[i]) { +        VAUsedInPlace = true; +        continue; +      } + +      // We can only insert a single non-zeroable element. +      if (VADstIndex >= 0 || VBDstIndex >= 0) +        return false; + +      if (CandidateMask[i] < 4) { +        // VA input out of place for insertion. +        VADstIndex = i; +      } else { +        // VB input for insertion. +        VBDstIndex = i; +      }      } -    // We can only insert a single non-zeroable element. -    if (V1DstIndex >= 0 || V2DstIndex >= 0) +    // Don't bother if we have no (non-zeroable) element for insertion. +    if (VADstIndex < 0 && VBDstIndex < 0)        return false; -    if (Mask[i] < 4) { -      // V1 input out of place for insertion. -      V1DstIndex = i; +    // Determine element insertion src/dst indices. The src index is from the +    // start of the inserted vector, not the start of the concatenated vector. +    unsigned VBSrcIndex = 0; +    if (VADstIndex >= 0) { +      // If we have a VA input out of place, we use VA as the V2 element +      // insertion and don't use the original V2 at all. +      VBSrcIndex = CandidateMask[VADstIndex]; +      VBDstIndex = VADstIndex; +      VB = VA;      } else { -      // V2 input for insertion. -      V2DstIndex = i; +      VBSrcIndex = CandidateMask[VBDstIndex] - 4;      } -  } -  // Don't bother if we have no (non-zeroable) element for insertion. -  if (V1DstIndex < 0 && V2DstIndex < 0) -    return false; +    // If no V1 inputs are used in place, then the result is created only from +    // the zero mask and the V2 insertion - so remove V1 dependency. +    if (!VAUsedInPlace) +      VA = DAG.getUNDEF(MVT::v4f32); -  // Determine element insertion src/dst indices. The src index is from the -  // start of the inserted vector, not the start of the concatenated vector. -  unsigned V2SrcIndex = 0; -  if (V1DstIndex >= 0) { -    // If we have a V1 input out of place, we use V1 as the V2 element insertion -    // and don't use the original V2 at all. -    V2SrcIndex = Mask[V1DstIndex]; -    V2DstIndex = V1DstIndex; -    V2 = V1; -  } else { -    V2SrcIndex = Mask[V2DstIndex] - 4; -  } +    // Update V1, V2 and InsertPSMask accordingly. +    V1 = VA; +    V2 = VB; -  // If no V1 inputs are used in place, then the result is created only from -  // the zero mask and the V2 insertion - so remove V1 dependency. -  if (!V1UsedInPlace) -    V1 = DAG.getUNDEF(MVT::v4f32); +    // Insert the V2 element into the desired position. +    InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask; +    assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); +    return true; +  }; -  // Insert the V2 element into the desired position. -  InsertPSMask = V2SrcIndex << 6 | V2DstIndex << 4 | ZMask; -  assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); -  return true; +  if (matchAsInsertPS(V1, V2, Mask)) +    return true; + +  // Commute and try again. +  SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); +  ShuffleVectorSDNode::commuteMask(CommutedMask); +  if (matchAsInsertPS(V2, V1, CommutedMask)) +    return true; + +  return false;  }  static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1, | 

