diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 |
1 files changed, 16 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 76d5f8e9d31..2256fe02696 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36521,31 +36521,31 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, } static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N); - if (Mst->isCompressingStore()) return SDValue(); + EVT VT = Mst->getValue().getValueType(); if (!Mst->isTruncatingStore()) { if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) return ScalarStore; - // If the mask is checking (0 > X), we're creating a vector with all-zeros - // or all-ones elements based on the sign bits of X. AVX1 masked store only - // cares about the sign bit of each mask element, so eliminate the compare: - // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X - // Note that by waiting to match an x86-specific PCMPGT node, we're - // eliminating potentially more complex matching of a setcc node which has - // a full range of predicates. + // If the mask value has been legalized to a non-boolean vector, try to + // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); - if (Mask.getOpcode() == X86ISD::PCMPGT && - ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) { - assert(Mask.getValueType() == Mask.getOperand(1).getValueType() && - "Unexpected type for PCMPGT"); - return DAG.getMaskedStore( - Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(), - Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand()); + if (Mask.getScalarValueSizeInBits() != 1) { + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); + KnownBits Known; + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) { + DCI.AddToWorklist(Mask.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + return SDValue(N, 0); + } } // TODO: AVX512 targets should also be able to simplify something like the @@ -36556,7 +36556,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, } // Resolve truncating stores. - EVT VT = Mst->getValue().getValueType(); unsigned NumElems = VT.getVectorNumElements(); EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); @@ -40382,7 +40381,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, Subtarget); - case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget); + case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: |