summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp112
1 files changed, 67 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0815f830ccc..7745e6c4ce9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8762,63 +8762,85 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
- unsigned ZMask = 0;
- int V1DstIndex = -1;
- int V2DstIndex = -1;
- bool V1UsedInPlace = false;
- for (int i = 0; i < 4; ++i) {
- // Synthesize a zero mask from the zeroable elements (includes undefs).
- if (Zeroable[i]) {
- ZMask |= 1 << i;
- continue;
- }
+ // Attempt to match INSERTPS with one element from VA or VB being
+ // inserted into VA (or undef). If successful, V1, V2 and InsertPSMask
+ // are updated.
+ auto matchAsInsertPS = [&](SDValue VA, SDValue VB,
+ ArrayRef<int> CandidateMask) {
+ unsigned ZMask = 0;
+ int VADstIndex = -1;
+ int VBDstIndex = -1;
+ bool VAUsedInPlace = false;
+
+ for (int i = 0; i < 4; ++i) {
+ // Synthesize a zero mask from the zeroable elements (includes undefs).
+ if (Zeroable[i]) {
+ ZMask |= 1 << i;
+ continue;
+ }
- // Flag if we use any V1 inputs in place.
- if (i == Mask[i]) {
- V1UsedInPlace = true;
- continue;
+ // Flag if we use any VA inputs in place.
+ if (i == CandidateMask[i]) {
+ VAUsedInPlace = true;
+ continue;
+ }
+
+ // We can only insert a single non-zeroable element.
+ if (VADstIndex >= 0 || VBDstIndex >= 0)
+ return false;
+
+ if (CandidateMask[i] < 4) {
+ // VA input out of place for insertion.
+ VADstIndex = i;
+ } else {
+ // VB input for insertion.
+ VBDstIndex = i;
+ }
}
- // We can only insert a single non-zeroable element.
- if (V1DstIndex >= 0 || V2DstIndex >= 0)
+ // Don't bother if we have no (non-zeroable) element for insertion.
+ if (VADstIndex < 0 && VBDstIndex < 0)
return false;
- if (Mask[i] < 4) {
- // V1 input out of place for insertion.
- V1DstIndex = i;
+ // Determine element insertion src/dst indices. The src index is from the
+ // start of the inserted vector, not the start of the concatenated vector.
+ unsigned VBSrcIndex = 0;
+ if (VADstIndex >= 0) {
+ // If we have a VA input out of place, we use VA as the V2 element
+ // insertion and don't use the original V2 at all.
+ VBSrcIndex = CandidateMask[VADstIndex];
+ VBDstIndex = VADstIndex;
+ VB = VA;
} else {
- // V2 input for insertion.
- V2DstIndex = i;
+ VBSrcIndex = CandidateMask[VBDstIndex] - 4;
}
- }
- // Don't bother if we have no (non-zeroable) element for insertion.
- if (V1DstIndex < 0 && V2DstIndex < 0)
- return false;
+ // If no V1 inputs are used in place, then the result is created only from
+ // the zero mask and the V2 insertion - so remove V1 dependency.
+ if (!VAUsedInPlace)
+ VA = DAG.getUNDEF(MVT::v4f32);
- // Determine element insertion src/dst indices. The src index is from the
- // start of the inserted vector, not the start of the concatenated vector.
- unsigned V2SrcIndex = 0;
- if (V1DstIndex >= 0) {
- // If we have a V1 input out of place, we use V1 as the V2 element insertion
- // and don't use the original V2 at all.
- V2SrcIndex = Mask[V1DstIndex];
- V2DstIndex = V1DstIndex;
- V2 = V1;
- } else {
- V2SrcIndex = Mask[V2DstIndex] - 4;
- }
+ // Update V1, V2 and InsertPSMask accordingly.
+ V1 = VA;
+ V2 = VB;
- // If no V1 inputs are used in place, then the result is created only from
- // the zero mask and the V2 insertion - so remove V1 dependency.
- if (!V1UsedInPlace)
- V1 = DAG.getUNDEF(MVT::v4f32);
+ // Insert the V2 element into the desired position.
+ InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask;
+ assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
+ return true;
+ };
- // Insert the V2 element into the desired position.
- InsertPSMask = V2SrcIndex << 6 | V2DstIndex << 4 | ZMask;
- assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
- return true;
+ if (matchAsInsertPS(V1, V2, Mask))
+ return true;
+
+ // Commute and try again.
+ SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+ if (matchAsInsertPS(V2, V1, CommutedMask))
+ return true;
+
+ return false;
}
static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
OpenPOWER on IntegriCloud