summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorPiotr Sobczak <Piotr.Sobczak@amd.com>2019-11-12 11:46:54 +0100
committerPiotr Sobczak <Piotr.Sobczak@amd.com>2019-11-15 15:01:15 +0100
commit02419ab5c73935bed7aef5fc43e06c6b5f37fc04 (patch)
treebf524fd615220c4821bba4a7266f1c36ef56de0a /llvm/lib
parent3d708bf5c2672cae01e5ecb0ed1877e3d56ee451 (diff)
downloadbcm5719-llvm-02419ab5c73935bed7aef5fc43e06c6b5f37fc04.tar.gz
bcm5719-llvm-02419ab5c73935bed7aef5fc43e06c6b5f37fc04.zip
[AMDGPU] Lower llvm.amdgcn.s.buffer.load.v3[i|f]32
Summary: Add lowering support for 32-bit vec3 variant of s.buffer.load intrinsic. Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70118
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp30
1 files changed, 24 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e26f0e3b611..85af397228e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5659,11 +5659,16 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDValue Offset, SDValue GLC, SDValue DLC,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+
+ const DataLayout &DataLayout = DAG.getDataLayout();
+ unsigned Align =
+ DataLayout.getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
- VT.getStoreSize(), VT.getStoreSize());
+ VT.getStoreSize(), Align);
if (!Offset->isDivergent()) {
SDValue Ops[] = {
@@ -5672,6 +5677,20 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
GLC,
DLC,
};
+
+ // Widen vec3 load to vec4.
+ if (VT.isVector() && VT.getVectorNumElements() == 3) {
+ EVT WidenedVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
+ auto WidenedOp = DAG.getMemIntrinsicNode(
+ AMDGPUISD::SBUFFER_LOAD, DL, DAG.getVTList(WidenedVT), Ops, WidenedVT,
+ MF.getMachineMemOperand(MMO, 0, WidenedVT.getStoreSize()));
+ auto Subvector = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, WidenedOp,
+ DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ return Subvector;
+ }
+
return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
DAG.getVTList(VT), Ops, VT, MMO);
}
@@ -5683,11 +5702,10 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
MVT LoadVT = VT.getSimpleVT();
unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
assert((LoadVT.getScalarType() == MVT::i32 ||
- LoadVT.getScalarType() == MVT::f32) &&
- isPowerOf2_32(NumElts));
+ LoadVT.getScalarType() == MVT::f32));
if (NumElts == 8 || NumElts == 16) {
- NumLoads = NumElts == 16 ? 4 : 2;
+ NumLoads = NumElts / 4;
LoadVT = MVT::v4i32;
}
@@ -5711,8 +5729,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
- Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
- Ops, LoadVT, MMO));
+ Loads.push_back(getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops,
+ LoadVT, MMO, DAG));
}
if (VT == MVT::v8i32 || VT == MVT::v16i32)
OpenPOWER on IntegriCloud