diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2017-03-21 13:50:33 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2017-03-21 13:50:33 +0000 |
| commit | 79379cae157df2c9a840810fb2d432d8d46d7ef5 (patch) | |
| tree | 54e0b462ec143428315ef1f20c0fd32408be9000 /llvm/lib/Target | |
| parent | e3ad2e2e73983966b5b84293a0f501e409abddde (diff) | |
| download | bcm5719-llvm-79379cae157df2c9a840810fb2d432d8d46d7ef5.tar.gz bcm5719-llvm-79379cae157df2c9a840810fb2d432d8d46d7ef5.zip | |
[x86] use PMOVMSK for vector-sized equality comparisons
We could do better by splitting any oversized type into whatever vector size the target supports,
but I left that for future work if it ever comes up. The motivating case is memcmp() calls on 16-byte
structs, so I think we can wire that up with a TLI hook that feeds into this.
Differential Revision: https://reviews.llvm.org/D31156
llvm-svn: 298376
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2165e08fa25..6a7c6d59db9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34022,6 +34022,47 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Try to map a 128-bit or larger integer comparison to vector instructions +/// before type legalization splits it up into chunks. +static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); + assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); + + // We're looking for an oversized integer equality comparison, but ignore a + // comparison with zero because that gets special treatment in EmitTest(). + SDValue X = SetCC->getOperand(0); + SDValue Y = SetCC->getOperand(1); + EVT OpVT = X.getValueType(); + unsigned OpSize = OpVT.getSizeInBits(); + if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y)) + return SDValue(); + + // TODO: Use PXOR + PTEST for SSE4.1 or later? + // TODO: Add support for AVX-512. + EVT VT = SetCC->getValueType(0); + SDLoc DL(SetCC); + if ((OpSize == 128 && Subtarget.hasSSE2()) || + (OpSize == 256 && Subtarget.hasAVX2())) { + EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8; + SDValue VecX = DAG.getBitcast(VecVT, X); + SDValue VecY = DAG.getBitcast(VecVT, Y); + + // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality. + // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq + // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne + // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq + // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne + SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY); + SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); + SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL, + MVT::i32); + return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); + } + + return SDValue(); +} + static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); @@ -34046,6 +34087,9 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); } + + if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) + return V; } if (VT.getScalarType() == MVT::i1 && |

