diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1b91d743641..69401f455b4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7097,8 +7097,11 @@ SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N, SDValue SITargetLowering::performExtractVectorEltCombine( SDNode *N, DAGCombinerInfo &DCI) const { SDValue Vec = N->getOperand(0); - SelectionDAG &DAG = DCI.DAG; + + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + if ((Vec.getOpcode() == ISD::FNEG || Vec.getOpcode() == ISD::FABS) && allUsesHaveSourceMods(N)) { SDLoc SL(N); @@ -7139,6 +7142,44 @@ SDValue SITargetLowering::performExtractVectorEltCombine( Vec.getOperand(1), Idx)); } } + + if (!DCI.isBeforeLegalize()) + return SDValue(); + + unsigned VecSize = VecVT.getSizeInBits(); + unsigned EltSize = EltVT.getSizeInBits(); + + // Try to turn sub-dword accesses of vectors into accesses of the same 32-bit + // elements. This exposes more load reduction opportunities by replacing + // multiple small extract_vector_elements with a single 32-bit extract. + auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (EltSize <= 16 && + EltVT.isByteSized() && + VecSize > 32 && + VecSize % 32 == 0 && + Idx) { + EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT); + + unsigned BitIndex = Idx->getZExtValue() * EltSize; + unsigned EltIdx = BitIndex / 32; + unsigned LeftoverBitIdx = BitIndex % 32; + SDLoc SL(N); + + SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec); + DCI.AddToWorklist(Cast.getNode()); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast, + DAG.getConstant(EltIdx, SL, MVT::i32)); + DCI.AddToWorklist(Elt.getNode()); + SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt, + DAG.getConstant(LeftoverBitIdx, SL, MVT::i32)); + DCI.AddToWorklist(Srl.getNode()); + + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, EltVT.changeTypeToInteger(), Srl); + DCI.AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::BITCAST, SL, EltVT, Trunc); + } + return SDValue(); } |

