diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2018-10-09 14:04:14 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2018-10-09 14:04:14 +0000 |
| commit | f5fac1826a867d5e65310456670cc1d5a495ed02 (patch) | |
| tree | 8b91cb196a284c34941c1b8346d3b386adeecf10 /llvm/lib/Target/X86 | |
| parent | 0b24a86f6322d1fa3e458b8114fa9a88f39ea360 (diff) | |
| download | bcm5719-llvm-f5fac1826a867d5e65310456670cc1d5a495ed02.tar.gz bcm5719-llvm-f5fac1826a867d5e65310456670cc1d5a495ed02.zip | |
[x86] use demanded bits to simplify masked store codegen
As noted in D52747, if we prefer IR to use trunc for bool vectors rather
than and+icmp, we can expose codegen shortcomings as seen here with masked store.
Replace a hard-coded PCMPGT simplification with the more general demanded bits call
to improve things.
Differential Revision: https://reviews.llvm.org/D52964
llvm-svn: 344048
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 |
1 files changed, 16 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 76d5f8e9d31..2256fe02696 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36521,31 +36521,31 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, } static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N); - if (Mst->isCompressingStore()) return SDValue(); + EVT VT = Mst->getValue().getValueType(); if (!Mst->isTruncatingStore()) { if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG)) return ScalarStore; - // If the mask is checking (0 > X), we're creating a vector with all-zeros - // or all-ones elements based on the sign bits of X. AVX1 masked store only - // cares about the sign bit of each mask element, so eliminate the compare: - // mstore val, ptr, (pcmpgt 0, X) --> mstore val, ptr, X - // Note that by waiting to match an x86-specific PCMPGT node, we're - // eliminating potentially more complex matching of a setcc node which has - // a full range of predicates. + // If the mask value has been legalized to a non-boolean vector, try to + // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); - if (Mask.getOpcode() == X86ISD::PCMPGT && - ISD::isBuildVectorAllZeros(Mask.getOperand(0).getNode())) { - assert(Mask.getValueType() == Mask.getOperand(1).getValueType() && - "Unexpected type for PCMPGT"); - return DAG.getMaskedStore( - Mst->getChain(), SDLoc(N), Mst->getValue(), Mst->getBasePtr(), - Mask.getOperand(1), Mst->getMemoryVT(), Mst->getMemOperand()); + if (Mask.getScalarValueSizeInBits() != 1) { + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); + KnownBits Known; + if (TLI.SimplifyDemandedBits(Mask, DemandedMask, Known, TLO)) { + DCI.AddToWorklist(Mask.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + return SDValue(N, 0); + } } // TODO: AVX512 targets should also be able to simplify something like the @@ -36556,7 +36556,6 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, } // Resolve truncating stores. - EVT VT = Mst->getValue().getValueType(); unsigned NumElems = VT.getVectorNumElements(); EVT StVT = Mst->getMemoryVT(); SDLoc dl(Mst); @@ -40382,7 +40381,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, Subtarget); - case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget); + case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: |

