summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp181
1 files changed, 169 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c73e6b57ee0..0248b5878b7 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5158,6 +5158,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Op.getOperand(0), // Chain
Op.getOperand(2), // rsrc
@@ -5165,10 +5172,57 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
- Op.getOperand(7), // dfmt
- Op.getOperand(8), // nfmt
- Op.getOperand(9), // glc
- Op.getOperand(10) // slc
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
+ };
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_raw_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+ auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
+
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // format
+ Op.getOperand(6), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idxen
+ };
+
+ if (LoadVT.getScalarType() == MVT::f16)
+ return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
+ M, DAG, Ops);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
+ Op->getVTList(), Ops, LoadVT,
+ M->getMemOperand());
+ }
+ case Intrinsic::amdgcn_struct_tbuffer_load: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ EVT LoadVT = Op.getValueType();
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -5407,6 +5461,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto Opcode = NumChannels->getZExtValue() == 3 ?
AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT;
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(12))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(13))->getZExtValue();
SDValue Ops[] = {
Chain,
Op.getOperand(3), // vdata
@@ -5415,10 +5473,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
VOffset,
Op.getOperand(6), // soffset
Op.getOperand(7), // inst_offset
- Op.getOperand(8), // dfmt
- Op.getOperand(9), // nfmt
- Op.getOperand(12), // glc
- Op.getOperand(13), // slc
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn->isOne(), DL, MVT::i1), // idxen
};
assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 &&
@@ -5438,6 +5495,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
+ unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
+ unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
+ unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
+ unsigned IdxEn = 1;
+ if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4)))
+ IdxEn = Idx->getZExtValue() != 0;
SDValue Ops[] = {
Chain,
VData, // vdata
@@ -5446,10 +5510,59 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // voffset
Op.getOperand(6), // soffset
Op.getOperand(7), // offset
- Op.getOperand(8), // dfmt
- Op.getOperand(9), // nfmt
- Op.getOperand(10), // glc
- Op.getOperand(11) // slc
+ DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
+ DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
+ DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
+ };
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_struct_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // format
+ Op.getOperand(8), // cachepolicy
+ DAG.getConstant(1, DL, MVT::i1), // idexen
+ };
+ unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
+ AMDGPUISD::TBUFFER_STORE_FORMAT;
+ MemSDNode *M = cast<MemSDNode>(Op);
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
+ M->getMemoryVT(), M->getMemOperand());
+ }
+
+ case Intrinsic::amdgcn_raw_tbuffer_store: {
+ SDValue VData = Op.getOperand(2);
+ bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
+ if (IsD16)
+ VData = handleD16VData(VData, DAG);
+ auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
+ SDValue Ops[] = {
+ Chain,
+ VData, // vdata
+ Op.getOperand(3), // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy
+ DAG.getConstant(0, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -5490,6 +5603,50 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
}
+// The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
+// (the offset that is included in bounds checking and swizzling, to be split
+// between the instruction's voffset and immoffset fields) and soffset (the
+// offset that is excluded from bounds checking and swizzling, to go in the
+// instruction's soffset field). This function takes the first kind of offset
+// and figures out how to split it between voffset and immoffset.
+std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
+ SDValue Offset, SelectionDAG &DAG) const {
+ SDLoc DL(Offset);
+ const unsigned MaxImm = 4095;
+ SDValue N0 = Offset;
+ ConstantSDNode *C1 = nullptr;
+ if (N0.getOpcode() == ISD::ADD) {
+ if ((C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))))
+ N0 = N0.getOperand(0);
+ } else if ((C1 = dyn_cast<ConstantSDNode>(N0)))
+ N0 = SDValue();
+
+ if (C1) {
+ unsigned ImmOffset = C1->getZExtValue();
+ // If the immediate value is too big for the immoffset field, put the value
+ // mod 4096 into the immoffset field so that the value that is copied/added
+ // for the voffset field is a multiple of 4096, and it stands more chance
+ // of being CSEd with the copy/add for another similar load/store.
+ unsigned Overflow = ImmOffset & ~MaxImm;
+ ImmOffset -= Overflow;
+ C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
+ if (Overflow) {
+ auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
+ if (!N0)
+ N0 = OverflowVal;
+ else {
+ SDValue Ops[] = { N0, OverflowVal };
+ N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops);
+ }
+ }
+ }
+ if (!N0)
+ N0 = DAG.getConstant(0, DL, MVT::i32);
+ if (!C1)
+ C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
+ return {N0, SDValue(C1, 0)};
+}
+
static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
ISD::LoadExtType ExtType, SDValue Op,
const SDLoc &SL, EVT VT) {
OpenPOWER on IntegriCloud