diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-08 20:46:15 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-08 20:46:15 +0000 |
commit | 26e76ef0e2cf358809d2b41e657074fc21133d59 (patch) | |
tree | 69c6fc55e8c3494b30a3a91d7e83f29707339e16 /llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | |
parent | 43f098e719d022a935dd9244bcf936bd7d3d021f (diff) | |
download | bcm5719-llvm-26e76ef0e2cf358809d2b41e657074fc21133d59.tar.gz bcm5719-llvm-26e76ef0e2cf358809d2b41e657074fc21133d59.zip |
DAG: Don't try to cluster loads with tied inputs
This avoids breaking possible value dependencies when sorting loads by
offset.
AMDGPU has some load instructions that write into the high or low bits
of the destination register, and have a tied input for the other input
bits. These can easily have the same base pointer, but be a swizzle so
the high address load needs to come first. This was inserting glue
forcing the opposite ordering, producing a cycle the InstrEmitter
would assert on. It may be potentially expensive to look for the
dependency between the other loads, so just skip any where this could
happen.
Fixes bug 40936 by reverting r351379, which added a hacky attempt to
fix this by adding chains in this case, which I think was just working
around broken glue before the InstrEmitter. The core of the patch is
re-implementing the fix for that problem.
llvm-svn: 355728
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 21 |
1 files changed, 20 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 559265a3ac8..93c5c34b67a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -205,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (!Chain) return; + // Skip any load instruction that has a tied input. There may be an additional + // dependency requiring a different order than by increasing offsets, and the + // added glue may introduce a cycle. + auto hasTiedInput = [this](const SDNode *N) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned I = 0; I != MCID.getNumOperands(); ++I) { + if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1) + return true; + } + + return false; + }; + // Look for other loads of the same chain. Find loads that are loading from // the same base pointer and different offsets. SmallPtrSet<SDNode*, 16> Visited; @@ -212,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. bool Cluster = false; SDNode *Base = Node; + + if (hasTiedInput(Base)) + return; + // This algorithm requires a reasonably low use count before finding a match // to avoid uselessly blowing up compile time in large blocks. unsigned UseCount = 0; @@ -222,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) + Offset1 == Offset2 || + hasTiedInput(User)) { // FIXME: Should be ok if they addresses are identical. But earlier // optimizations really should have eliminated one of the loads. continue; + } if (O2SMap.insert(std::make_pair(Offset1, Base)).second) Offsets.push_back(Offset1); O2SMap.insert(std::make_pair(Offset2, User)); |