diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-31 13:51:10 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-31 13:51:10 +0000 |
| commit | c29eab52e886b361fbb319b56e95cf328e55ccd4 (patch) | |
| tree | 5fce73831720ab7665c86500831e692bc41dcaed /llvm/lib/Target | |
| parent | 2f2a6ab991c22763fd25714c01988cb2ef156d50 (diff) | |
| download | bcm5719-llvm-c29eab52e886b361fbb319b56e95cf328e55ccd4.tar.gz bcm5719-llvm-c29eab52e886b361fbb319b56e95cf328e55ccd4.zip | |
[X86][SSE] Add support for combining PINSRW into a target shuffle.
Also add the ability to recognise PINSR(Vex, 0, Idx).
Targets shuffle combines won't replace multiple insertions with a bit mask until a depth of 3 or more, so we avoid codesize bloat.
The unnecessary vpblendw in clearupper8xi16a will be fixed in an upcoming patch.
llvm-svn: 293627
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a05c7f58e9e..6b235d08ce3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5770,12 +5770,21 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask, return true; } case X86ISD::PINSRW: { - // Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern. - // TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc. SDValue InVec = N.getOperand(0); SDValue InScl = N.getOperand(1); uint64_t InIdx = N.getConstantOperandVal(2); assert(InIdx < NumElts && "Illegal insertion index"); + + // Attempt to recognise a PINSRW(VEC, 0, Idx) shuffle pattern. + if (X86::isZeroNode(InScl)) { + Ops.push_back(InVec); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i == InIdx ? SM_SentinelZero : i); + return true; + } + + // Attempt to recognise a PINSRW(ASSERTZEXT(PEXTRW)) shuffle pattern. + // TODO: Expand this to support PINSRB/INSERT_VECTOR_ELT/etc. if (InScl.getOpcode() != ISD::AssertZext || InScl.getOperand(0).getOpcode() != X86ISD::PEXTRW) return false; @@ -30597,6 +30606,24 @@ static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + unsigned Opcode = N->getOpcode(); + assert(((X86ISD::PINSRB == Opcode && N->getValueType(0) ==MVT::v16i8) || + (X86ISD::PINSRW == Opcode && N->getValueType(0) ==MVT::v8i16)) && + "Unexpected vector insertion"); + + // Attempt to combine PINSRB/PINSRW patterns to a shuffle. + SDValue Op(N, 0); + SmallVector<int, 1> NonceMask; // Just a placeholder. + NonceMask.push_back(0); + combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + /*Depth*/ 1, /*HasVarMask*/ false, DAG, + DCI, Subtarget); + return SDValue(); +} + /// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs /// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for /// OR -> CMPNEQSS. @@ -34159,6 +34186,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VSRLI: return combineVectorShift(N, DAG, DCI, Subtarget); case X86ISD::VSEXT: case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget); + case X86ISD::PINSRB: + case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: case X86ISD::PALIGNR: |

