summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp33
1 files changed, 29 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 687879a46fa..1c87adda820 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4317,6 +4317,11 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
return (Val < 0 || Val == CmpVal);
}
+/// Val is either the undef or zero sentinel value.
+static bool isUndefOrZero(int Val) {
+ return (Val == SM_SentinelUndef || Val == SM_SentinelZero);
+}
+
/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (Low, Low+Size]. or is undef.
@@ -23989,6 +23994,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
SDValue Op1 = N.getOperand(1);
SDValue Op2 = N.getOperand(2);
unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue();
+ unsigned SrcIdx = (InsertPSMask >> 6) & 0x3;
unsigned DstIdx = (InsertPSMask >> 4) & 0x3;
unsigned ZeroMask = InsertPSMask & 0xF;
@@ -24002,19 +24008,38 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getConstant(InsertPSMask, DL, MVT::i8));
- SmallVector<int, 8> TargetMask;
- if (!setTargetShuffleZeroElements(Op0, TargetMask))
+ // Attempt to merge insertps Op1 with an inner target shuffle node.
+ SmallVector<int, 8> TargetMask1;
+ if (setTargetShuffleZeroElements(Op1, TargetMask1)) {
+ int M = TargetMask1[SrcIdx];
+ if (isUndefOrZero(M)) {
+ // Zero/UNDEF insertion - zero out element and remove dependency.
+ InsertPSMask |= (1u << DstIdx);
+ return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
+ DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ }
+ // Update insertps mask srcidx and reference the source input directly.
+ assert(0 <= M && M < 8 && "Shuffle index out of range");
+ InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
+ Op1 = Op1.getOperand(M < 4 ? 0 : 1);
+ return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
+ DAG.getConstant(InsertPSMask, DL, MVT::i8));
+ }
+
+ // Attempt to merge insertps Op0 with an inner target shuffle node.
+ SmallVector<int, 8> TargetMask0;
+ if (!setTargetShuffleZeroElements(Op0, TargetMask0))
return SDValue();
bool Updated = false;
bool UseInput00 = false;
bool UseInput01 = false;
for (int i = 0; i != 4; ++i) {
- int M = TargetMask[i];
+ int M = TargetMask0[i];
if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) {
// No change if element is already zero or the inserted element.
continue;
- } else if (M < 0) {
+ } else if (isUndefOrZero(M)) {
// If the target mask is undef/zero then we must zero the element.
InsertPSMask |= (1u << i);
Updated = true;
OpenPOWER on IntegriCloud