diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 45 |
2 files changed, 20 insertions, 46 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 559265a3ac8..93c5c34b67a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -205,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (!Chain) return; + // Skip any load instruction that has a tied input. There may be an additional + // dependency requiring a different order than by increasing offsets, and the + // added glue may introduce a cycle. + auto hasTiedInput = [this](const SDNode *N) { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned I = 0; I != MCID.getNumOperands(); ++I) { + if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1) + return true; + } + + return false; + }; + // Look for other loads of the same chain. Find loads that are loading from // the same base pointer and different offsets. SmallPtrSet<SDNode*, 16> Visited; @@ -212,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. bool Cluster = false; SDNode *Base = Node; + + if (hasTiedInput(Base)) + return; + // This algorithm requires a reasonably low use count before finding a match // to avoid uselessly blowing up compile time in large blocks. unsigned UseCount = 0; @@ -222,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) + Offset1 == Offset2 || + hasTiedInput(User)) { // FIXME: Should be ok if they addresses are identical. But earlier // optimizations really should have eliminated one of the loads. continue; + } if (O2SMap.insert(std::make_pair(Offset1, Base)).second) Offsets.push_back(Offset1); O2SMap.insert(std::make_pair(Offset2, User)); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3873c46b44e..69e05c311d2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9367,51 +9367,6 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, Ops.push_back(ImpDef.getValue(1)); return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - case AMDGPU::FLAT_LOAD_UBYTE_D16_HI: - case AMDGPU::FLAT_LOAD_SBYTE_D16_HI: - case AMDGPU::FLAT_LOAD_SHORT_D16_HI: - case AMDGPU::GLOBAL_LOAD_UBYTE_D16_HI: - case AMDGPU::GLOBAL_LOAD_SBYTE_D16_HI: - case AMDGPU::GLOBAL_LOAD_SHORT_D16_HI: - case AMDGPU::DS_READ_U16_D16_HI: - case AMDGPU::DS_READ_I8_D16_HI: - case AMDGPU::DS_READ_U8_D16_HI: - case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET: - case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET: - case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET: - case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN: - case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN: - case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: { - // For these loads that write to the HI part of a register, - // we should chain them to the op that writes to the LO part - // of the register to maintain the order. - unsigned NumOps = Node->getNumOperands(); - SDValue OldChain = Node->getOperand(NumOps-1); - - if (OldChain.getValueType() != MVT::Other) - break; - - // Look for the chain to replace to. - SDValue Lo = Node->getOperand(NumOps-2); - SDNode *LoNode = Lo.getNode(); - if (LoNode->getNumValues() == 1 || - LoNode->getValueType(LoNode->getNumValues() - 1) != MVT::Other) - break; - - SDValue NewChain = Lo.getValue(LoNode->getNumValues() - 1); - if (NewChain == OldChain) // Already replaced. - break; - - SmallVector<SDValue, 16> Ops; - for (unsigned I = 0; I < NumOps-1; ++I) - Ops.push_back(Node->getOperand(I)); - // Repalce the Chain. - Ops.push_back(NewChain); - MachineSDNode *NewNode = DAG.getMachineNode(Opcode, SDLoc(Node), - Node->getVTList(), Ops); - DAG.setNodeMemRefs(NewNode, Node->memoperands()); - return NewNode; - } default: break; } |