diff options
| author | Tom Stellard <tstellar@redhat.com> | 2019-10-08 17:04:51 +0000 |
|---|---|---|
| committer | Tom Stellard <tstellar@redhat.com> | 2019-10-08 17:04:51 +0000 |
| commit | 3a8d80944b7766449e2c8784a8fb30d19a2ba16c (patch) | |
| tree | ce032ddc23d43a6459876ecd58c0ca378a60f7b3 /llvm/lib | |
| parent | fb8218f2525ae7b03a383b066929878d265d5ef3 (diff) | |
| download | bcm5719-llvm-3a8d80944b7766449e2c8784a8fb30d19a2ba16c.tar.gz bcm5719-llvm-3a8d80944b7766449e2c8784a8fb30d19a2ba16c.zip | |
AMDGPU: Add offsets to MMO when lowering buffer intrinsics
Summary:
Without offsets on the MachineMemOperands (MMOs),
MachineInstr::mayAlias() will return true for all reads and writes to the
same resource descriptor. This leads to O(N^2) complexity in the MachineScheduler
when analyzing dependencies of buffer loads and stores. It also limits
the SILoadStoreOptimizer from merging more instructions.
This patch reduces the compile time of one pathological compute shader
from 12 seconds to 1 second.
Reviewers: arsenm, nhaehnle
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65097
llvm-svn: 374087
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 78 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 6 |
2 files changed, 73 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1883b28f657..1afbdb96d32 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6092,6 +6092,30 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } } +// This function computes an appropriate offset to pass to +// MachineMemOperand::setOffset() based on the offset inputs to +// an intrinsic. If any of the offsets are non-contstant or +// if VIndex is non-zero then this function returns 0. Otherwise, +// it returns the sum of VOffset, SOffset, and Offset. +static unsigned getBufferOffsetForMMO(SDValue VOffset, + SDValue SOffset, + SDValue Offset, + SDValue VIndex = SDValue()) { + + if (!isa<ConstantSDNode>(VOffset) || !isa<ConstantSDNode>(SOffset) || + !isa<ConstantSDNode>(Offset)) + return 0; + + if (VIndex) { + if (!isa<ConstantSDNode>(VIndex) || !cast<ConstantSDNode>(VIndex)->isNullValue()) + return 0; + } + + return cast<ConstantSDNode>(VOffset)->getSExtValue() + + cast<ConstantSDNode>(SOffset)->getSExtValue() + + cast<ConstantSDNode>(Offset)->getSExtValue(); +} + SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); @@ -6238,13 +6262,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); + unsigned Offset = setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; + unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; EVT VT = Op.getValueType(); EVT IntVT = VT.changeTypeToInteger(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(Offset); EVT LoadVT = Op.getValueType(); if (LoadVT.getScalarType() == MVT::f16) @@ -6275,7 +6304,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getTargetConstant(0, DL, MVT::i1), // idxen }; - return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); + auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5])); + return lowerIntrinsicLoad(M, IsFormat, DAG, Ops); } case Intrinsic::amdgcn_struct_buffer_load: case Intrinsic::amdgcn_struct_buffer_load_format: { @@ -6293,6 +6324,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getTargetConstant(1, DL, MVT::i1), // idxen }; + auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5], + Ops[2])); return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops); } case Intrinsic::amdgcn_tbuffer_load: { @@ -6398,10 +6432,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(Offset); unsigned Opcode = 0; switch (IntrID) { @@ -6469,6 +6507,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); unsigned Opcode = 0; switch (IntrID) { @@ -6542,6 +6581,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], + Ops[3])); unsigned Opcode = 0; switch (IntrID) { @@ -6605,9 +6646,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + unsigned Offset = setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(Offset); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6628,6 +6673,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, }; EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7])); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6648,6 +6694,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, }; EVT VT = Op.getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7], + Ops[4])); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -6889,11 +6937,15 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(Offset); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); @@ -6938,6 +6990,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32) @@ -6982,6 +7035,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], + Ops[3])); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); @@ -7008,10 +7063,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + // We don't know the offset if vindex is non-zero, so clear it. + if (IdxEn) + Offset = 0; EVT VT = Op.getOperand(2).getValueType(); auto *M = cast<MemSDNode>(Op); + M->getMemOperand()->setOffset(Offset); unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD : AMDGPUISD::BUFFER_ATOMIC_FADD; @@ -7105,7 +7164,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets( // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. -void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, +unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, SDValue *Offsets, unsigned Align) const { SDLoc DL(CombinedOffset); @@ -7116,7 +7175,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); - return; + return SOffset + ImmOffset; } } if (DAG.isBaseWithConstantOffset(CombinedOffset)) { @@ -7129,12 +7188,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, Offsets[0] = N0; Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); - return; + return 0; } } Offsets[0] = CombinedOffset; Offsets[1] = DAG.getConstant(0, DL, MVT::i32); Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); + return 0; } // Handle 8 bit and 16 bit buffer loads diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 1257457b5d6..8198420c42f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -203,8 +203,10 @@ private: // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. - void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, - SDValue *Offsets, unsigned Align = 4) const; + /// \returns 0 If there is a non-constant offset or if the offset is 0. + /// Otherwise returns the constant offset. + unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, + SDValue *Offsets, unsigned Align = 4) const; // Handle 8 bit and 16 bit buffer loads SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, |

