summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp43
1 files changed, 42 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1b91d743641..69401f455b4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7097,8 +7097,11 @@ SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
SDValue SITargetLowering::performExtractVectorEltCombine(
SDNode *N, DAGCombinerInfo &DCI) const {
SDValue Vec = N->getOperand(0);
-
SelectionDAG &DAG = DCI.DAG;
+
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+
if ((Vec.getOpcode() == ISD::FNEG ||
Vec.getOpcode() == ISD::FABS) && allUsesHaveSourceMods(N)) {
SDLoc SL(N);
@@ -7139,6 +7142,44 @@ SDValue SITargetLowering::performExtractVectorEltCombine(
Vec.getOperand(1), Idx));
}
}
+
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ unsigned VecSize = VecVT.getSizeInBits();
+ unsigned EltSize = EltVT.getSizeInBits();
+
+ // Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
+ // elements. This exposes more load reduction opportunities by replacing
+ // multiple small extract_vector_elements with a single 32-bit extract.
+ auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (EltSize <= 16 &&
+ EltVT.isByteSized() &&
+ VecSize > 32 &&
+ VecSize % 32 == 0 &&
+ Idx) {
+ EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);
+
+ unsigned BitIndex = Idx->getZExtValue() * EltSize;
+ unsigned EltIdx = BitIndex / 32;
+ unsigned LeftoverBitIdx = BitIndex % 32;
+ SDLoc SL(N);
+
+ SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
+ DCI.AddToWorklist(Cast.getNode());
+
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
+ DAG.getConstant(EltIdx, SL, MVT::i32));
+ DCI.AddToWorklist(Elt.getNode());
+ SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
+ DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
+ DCI.AddToWorklist(Srl.getNode());
+
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, EltVT.changeTypeToInteger(), Srl);
+ DCI.AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::BITCAST, SL, EltVT, Trunc);
+ }
+
return SDValue();
}
OpenPOWER on IntegriCloud