summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTom Stellard <tstellar@redhat.com>2019-10-08 17:04:51 +0000
committerTom Stellard <tstellar@redhat.com>2019-10-08 17:04:51 +0000
commit3a8d80944b7766449e2c8784a8fb30d19a2ba16c (patch)
treece032ddc23d43a6459876ecd58c0ca378a60f7b3 /llvm/lib
parentfb8218f2525ae7b03a383b066929878d265d5ef3 (diff)
downloadbcm5719-llvm-3a8d80944b7766449e2c8784a8fb30d19a2ba16c.tar.gz
bcm5719-llvm-3a8d80944b7766449e2c8784a8fb30d19a2ba16c.zip
AMDGPU: Add offsets to MMO when lowering buffer intrinsics
Summary: Without offsets on the MachineMemOperands (MMOs), MachineInstr::mayAlias() will return true for all reads and writes to the same resource descriptor. This leads to O(N^2) complexity in the MachineScheduler when analyzing dependencies of buffer loads and stores. It also limits the SILoadStoreOptimizer from merging more instructions. This patch reduces the compile time of one pathological compute shader from 12 seconds to 1 second. Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65097 llvm-svn: 374087
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp78
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h6
2 files changed, 73 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1883b28f657..1afbdb96d32 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6092,6 +6092,30 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
+// This function computes an appropriate offset to pass to
+// MachineMemOperand::setOffset() based on the offset inputs to
+// an intrinsic. If any of the offsets are non-contstant or
+// if VIndex is non-zero then this function returns 0. Otherwise,
+// it returns the sum of VOffset, SOffset, and Offset.
+static unsigned getBufferOffsetForMMO(SDValue VOffset,
+ SDValue SOffset,
+ SDValue Offset,
+ SDValue VIndex = SDValue()) {
+
+ if (!isa<ConstantSDNode>(VOffset) || !isa<ConstantSDNode>(SOffset) ||
+ !isa<ConstantSDNode>(Offset))
+ return 0;
+
+ if (VIndex) {
+ if (!isa<ConstantSDNode>(VIndex) || !cast<ConstantSDNode>(VIndex)->isNullValue())
+ return 0;
+ }
+
+ return cast<ConstantSDNode>(VOffset)->getSExtValue() +
+ cast<ConstantSDNode>(SOffset)->getSExtValue() +
+ cast<ConstantSDNode>(Offset)->getSExtValue();
+}
+
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -6238,13 +6262,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
+
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
EVT LoadVT = Op.getValueType();
if (LoadVT.getScalarType() == MVT::f16)
@@ -6275,7 +6304,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
- return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
+ auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5]));
+ return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format: {
@@ -6293,6 +6324,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
+ auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5],
+ Ops[2]));
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_tbuffer_load: {
@@ -6398,10 +6432,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
unsigned Opcode = 0;
switch (IntrID) {
@@ -6469,6 +6507,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6]));
unsigned Opcode = 0;
switch (IntrID) {
@@ -6542,6 +6581,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6],
+ Ops[3]));
unsigned Opcode = 0;
switch (IntrID) {
@@ -6605,9 +6646,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -6628,6 +6673,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7]));
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -6648,6 +6694,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7],
+ Ops[4]));
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -6889,11 +6937,15 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
@@ -6938,6 +6990,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6]));
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
@@ -6982,6 +7035,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6],
+ Ops[3]));
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
@@ -7008,10 +7063,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
- setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
+ // We don't know the offset if vindex is non-zero, so clear it.
+ if (IdxEn)
+ Offset = 0;
EVT VT = Op.getOperand(2).getValueType();
auto *M = cast<MemSDNode>(Op);
+ M->getMemOperand()->setOffset(Offset);
unsigned Opcode = VT.isVector() ? AMDGPUISD::BUFFER_ATOMIC_PK_FADD
: AMDGPUISD::BUFFER_ATOMIC_FADD;
@@ -7105,7 +7164,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
-void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
+unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SelectionDAG &DAG, SDValue *Offsets,
unsigned Align) const {
SDLoc DL(CombinedOffset);
@@ -7116,7 +7175,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
- return;
+ return SOffset + ImmOffset;
}
}
if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
@@ -7129,12 +7188,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
- return;
+ return 0;
}
}
Offsets[0] = CombinedOffset;
Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
+ return 0;
}
// Handle 8 bit and 16 bit buffer loads
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1257457b5d6..8198420c42f 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -203,8 +203,10 @@ private:
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
- void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
- SDValue *Offsets, unsigned Align = 4) const;
+ /// \returns 0 If there is a non-constant offset or if the offset is 0.
+ /// Otherwise returns the constant offset.
+ unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
+ SDValue *Offsets, unsigned Align = 4) const;
// Handle 8 bit and 16 bit buffer loads
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
OpenPOWER on IntegriCloud