diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 115 |
1 files changed, 64 insertions, 51 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d62bf919d79..1606fb8f108 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39587,61 +39587,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, return Ret; } -static SDValue combineOr(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineOrShiftToFunnelShift(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert(N->getOpcode() == ISD::OR && "Expected ISD::OR node"); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - // If this is SSE1 only convert to FOR to avoid scalarization. - if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { - return DAG.getBitcast(MVT::v4i32, - DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32, - DAG.getBitcast(MVT::v4f32, N0), - DAG.getBitcast(MVT::v4f32, N1))); - } - - // Match any-of bool scalar reductions into a bitcast/movmsk + cmp. - // TODO: Support multiple SrcOps. - if (VT == MVT::i1) { - SmallVector<SDValue, 2> SrcOps; - if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) && - SrcOps.size() == 1) { - SDLoc dl(N); - unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); - EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); - SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); - if (Mask) { - APInt AllBits = APInt::getNullValue(NumElts); - return DAG.getSetCC(dl, MVT::i1, Mask, - DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE); - } - } - } - - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) - return R; - - if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) - return FPLogic; - - if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget)) - return R; - - if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) - return R; - - // Attempt to recursively combine an OR of shuffles. - if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { - SDValue Op(N, 0); - if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) - return Res; - } - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); @@ -39764,6 +39716,67 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineOr(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // If this is SSE1 only convert to FOR to avoid scalarization. + if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { + return DAG.getBitcast(MVT::v4i32, + DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32, + DAG.getBitcast(MVT::v4f32, N0), + DAG.getBitcast(MVT::v4f32, N1))); + } + + // Match any-of bool scalar reductions into a bitcast/movmsk + cmp. + // TODO: Support multiple SrcOps. + if (VT == MVT::i1) { + SmallVector<SDValue, 2> SrcOps; + if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) && + SrcOps.size() == 1) { + SDLoc dl(N); + unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); + EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); + SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); + if (Mask) { + APInt AllBits = APInt::getNullValue(NumElts); + return DAG.getSetCC(dl, MVT::i1, Mask, + DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE); + } + } + } + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) + return R; + + if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) + return FPLogic; + + if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget)) + return R; + + if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) + return R; + + if (SDValue R = combineOrShiftToFunnelShift(N, DAG, Subtarget)) + return R; + + // Attempt to recursively combine an OR of shuffles. + if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { + SDValue Op(N, 0); + if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) + return Res; + } + + return SDValue(); +} + /// Try to turn tests against the signbit in the form of: /// XOR(TRUNCATE(SRL(X, size(X)-1)), 1) /// into: |