diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 181 |
1 files changed, 169 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c73e6b57ee0..0248b5878b7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5158,6 +5158,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, MemSDNode *M = cast<MemSDNode>(Op); EVT LoadVT = Op.getValueType(); + unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue(); + unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); + unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); + unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue(); + unsigned IdxEn = 1; + if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3))) + IdxEn = Idx->getZExtValue() != 0; SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // rsrc @@ -5165,10 +5172,57 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op.getOperand(4), // voffset Op.getOperand(5), // soffset Op.getOperand(6), // offset - Op.getOperand(7), // dfmt - Op.getOperand(8), // nfmt - Op.getOperand(9), // glc - Op.getOperand(10) // slc + DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getConstant(IdxEn, DL, MVT::i1), // idxen + }; + + if (LoadVT.getScalarType() == MVT::f16) + return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, + M, DAG, Ops); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, + Op->getVTList(), Ops, LoadVT, + M->getMemOperand()); + } + case Intrinsic::amdgcn_raw_tbuffer_load: { + MemSDNode *M = cast<MemSDNode>(Op); + EVT LoadVT = Op.getValueType(); + auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG); + + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + DAG.getConstant(0, DL, MVT::i32), // vindex + Offsets.first, // voffset + Op.getOperand(4), // soffset + Offsets.second, // offset + Op.getOperand(5), // format + Op.getOperand(6), // cachepolicy + DAG.getConstant(0, DL, MVT::i1), // idxen + }; + + if (LoadVT.getScalarType() == MVT::f16) + return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16, + M, DAG, Ops); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL, + Op->getVTList(), Ops, LoadVT, + M->getMemOperand()); + } + case Intrinsic::amdgcn_struct_tbuffer_load: { + MemSDNode *M = cast<MemSDNode>(Op); + EVT LoadVT = Op.getValueType(); + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); + + SDValue Ops[] = { + Op.getOperand(0), // Chain + Op.getOperand(2), // rsrc + Op.getOperand(3), // vindex + Offsets.first, // voffset + Op.getOperand(5), // soffset + Offsets.second, // offset + Op.getOperand(6), // format + Op.getOperand(7), // cachepolicy + DAG.getConstant(1, DL, MVT::i1), // idxen }; if (LoadVT.getScalarType() == MVT::f16) @@ -5407,6 +5461,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, auto Opcode = NumChannels->getZExtValue() == 3 ? AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT; + unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); + unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); + unsigned Glc = cast<ConstantSDNode>(Op.getOperand(12))->getZExtValue(); + unsigned Slc = cast<ConstantSDNode>(Op.getOperand(13))->getZExtValue(); SDValue Ops[] = { Chain, Op.getOperand(3), // vdata @@ -5415,10 +5473,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, VOffset, Op.getOperand(6), // soffset Op.getOperand(7), // inst_offset - Op.getOperand(8), // dfmt - Op.getOperand(9), // nfmt - Op.getOperand(12), // glc - Op.getOperand(13), // slc + DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getConstant(IdxEn->isOne(), DL, MVT::i1), // idxen }; assert((cast<ConstantSDNode>(Op.getOperand(14)))->getZExtValue() == 0 && @@ -5438,6 +5495,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); if (IsD16) VData = handleD16VData(VData, DAG); + unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue(); + unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue(); + unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue(); + unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue(); + unsigned IdxEn = 1; + if (auto Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4))) + IdxEn = Idx->getZExtValue() != 0; SDValue Ops[] = { Chain, VData, // vdata @@ -5446,10 +5510,59 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(5), // voffset Op.getOperand(6), // soffset Op.getOperand(7), // offset - Op.getOperand(8), // dfmt - Op.getOperand(9), // nfmt - Op.getOperand(10), // glc - Op.getOperand(11) // slc + DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format + DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy + DAG.getConstant(IdxEn, DL, MVT::i1), // idexen + }; + unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : + AMDGPUISD::TBUFFER_STORE_FORMAT; + MemSDNode *M = cast<MemSDNode>(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, + M->getMemoryVT(), M->getMemOperand()); + } + + case Intrinsic::amdgcn_struct_tbuffer_store: { + SDValue VData = Op.getOperand(2); + bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + if (IsD16) + VData = handleD16VData(VData, DAG); + auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG); + SDValue Ops[] = { + Chain, + VData, // vdata + Op.getOperand(3), // rsrc + Op.getOperand(4), // vindex + Offsets.first, // voffset + Op.getOperand(6), // soffset + Offsets.second, // offset + Op.getOperand(7), // format + Op.getOperand(8), // cachepolicy + DAG.getConstant(1, DL, MVT::i1), // idexen + }; + unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : + AMDGPUISD::TBUFFER_STORE_FORMAT; + MemSDNode *M = cast<MemSDNode>(Op); + return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, + M->getMemoryVT(), M->getMemOperand()); + } + + case Intrinsic::amdgcn_raw_tbuffer_store: { + SDValue VData = Op.getOperand(2); + bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); + if (IsD16) + VData = handleD16VData(VData, DAG); + auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG); + SDValue Ops[] = { + Chain, + VData, // vdata + Op.getOperand(3), // rsrc + DAG.getConstant(0, DL, MVT::i32), // vindex + Offsets.first, // voffset + Op.getOperand(5), // soffset + Offsets.second, // offset + Op.getOperand(6), // format + Op.getOperand(7), // cachepolicy + DAG.getConstant(0, DL, MVT::i1), // idexen }; unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 : AMDGPUISD::TBUFFER_STORE_FORMAT; @@ -5490,6 +5603,50 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, } } +// The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset +// (the offset that is included in bounds checking and swizzling, to be split +// between the instruction's voffset and immoffset fields) and soffset (the +// offset that is excluded from bounds checking and swizzling, to go in the +// instruction's soffset field). This function takes the first kind of offset +// and figures out how to split it between voffset and immoffset. +std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets( + SDValue Offset, SelectionDAG &DAG) const { + SDLoc DL(Offset); + const unsigned MaxImm = 4095; + SDValue N0 = Offset; + ConstantSDNode *C1 = nullptr; + if (N0.getOpcode() == ISD::ADD) { + if ((C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)))) + N0 = N0.getOperand(0); + } else if ((C1 = dyn_cast<ConstantSDNode>(N0))) + N0 = SDValue(); + + if (C1) { + unsigned ImmOffset = C1->getZExtValue(); + // If the immediate value is too big for the immoffset field, put the value + // mod 4096 into the immoffset field so that the value that is copied/added + // for the voffset field is a multiple of 4096, and it stands more chance + // of being CSEd with the copy/add for another similar load/store. + unsigned Overflow = ImmOffset & ~MaxImm; + ImmOffset -= Overflow; + C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32)); + if (Overflow) { + auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); + if (!N0) + N0 = OverflowVal; + else { + SDValue Ops[] = { N0, OverflowVal }; + N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops); + } + } + } + if (!N0) + N0 = DAG.getConstant(0, DL, MVT::i32); + if (!C1) + C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32)); + return {N0, SDValue(C1, 0)}; +} + static SDValue getLoadExtOrTrunc(SelectionDAG &DAG, ISD::LoadExtType ExtType, SDValue Op, const SDLoc &SL, EVT VT) { |