From 8c19925f42901db51dbbf3e3a7b1a789cd2bc646 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 1 Oct 2019 23:18:31 +0000 Subject: [X86] Add a DAG combine to shrink vXi64 gather/scatter indices that are constant with sufficient sign bits to fit in vXi32 The gather/scatter instructions can implicitly sign extend the indices. If we're operating on 32-bit data, an v16i64 index can force a v16i32 gather to be split in two since the index needs 2 registers. If we can shrink the index to the i32 we can avoid the split. It should always be safe to shrink the index regardless of the number of elements. We have gather/scatter instructions that can use v2i32 index stored in a v4i32 register with v2i64 data size. I've limited this to before legalize types to avoid creating a v2i32 after type legalization. We could check for it, but we'd also need testing. I'm also only handling build_vectors with no bitcasts to be sure the truncate will constant fold. Differential Revision: https://reviews.llvm.org/D68247 llvm-svn: 373408 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp') diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 58398df2059..8c837dfb6af 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42442,6 +42442,40 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, SDValue Base = GorS->getBasePtr(); SDValue Scale = GorS->getScale(); + // Shrink constant indices if they are larger than 32-bits. + // Only do this before legalize types since v2i64 could become v2i32. + // FIXME: We could check that the type is legal if we're after legalize types, + // but then we would need to construct test cases where that happens. + // FIXME: We could support more than just constant vectors, but we need to + // careful with costing. A truncate that can be optimized out would be fine. + // Otherwise we might only want to create a truncate if it avoids a split. + if (DCI.isBeforeLegalize()) { + if (auto *BV = dyn_cast(Index)) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); + if (BV->isConstant() && IndexWidth > 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); + } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); + } + } + } + if (DCI.isBeforeLegalizeOps()) { // Remove any sign extends from 32 or smaller to larger than 32. // Only do this before LegalizeOps in case we need the sign extend for -- cgit v1.2.3