summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp45
2 files changed, 20 insertions, 46 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 559265a3ac8..93c5c34b67a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -205,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (!Chain)
return;
+ // Skip any load instruction that has a tied input. There may be an additional
+ // dependency requiring a different order than by increasing offsets, and the
+ // added glue may introduce a cycle.
+ auto hasTiedInput = [this](const SDNode *N) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
+ if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
+ return true;
+ }
+
+ return false;
+ };
+
// Look for other loads of the same chain. Find loads that are loading from
// the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
@@ -212,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
bool Cluster = false;
SDNode *Base = Node;
+
+ if (hasTiedInput(Base))
+ return;
+
// This algorithm requires a reasonably low use count before finding a match
// to avoid uselessly blowing up compile time in large blocks.
unsigned UseCount = 0;
@@ -222,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
continue;
int64_t Offset1, Offset2;
if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
- Offset1 == Offset2)
+ Offset1 == Offset2 ||
+ hasTiedInput(User)) {
// FIXME: Should be ok if they addresses are identical. But earlier
// optimizations really should have eliminated one of the loads.
continue;
+ }
if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
Offsets.push_back(Offset1);
O2SMap.insert(std::make_pair(Offset2, User));
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3873c46b44e..69e05c311d2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9367,51 +9367,6 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
- case AMDGPU::FLAT_LOAD_UBYTE_D16_HI:
- case AMDGPU::FLAT_LOAD_SBYTE_D16_HI:
- case AMDGPU::FLAT_LOAD_SHORT_D16_HI:
- case AMDGPU::GLOBAL_LOAD_UBYTE_D16_HI:
- case AMDGPU::GLOBAL_LOAD_SBYTE_D16_HI:
- case AMDGPU::GLOBAL_LOAD_SHORT_D16_HI:
- case AMDGPU::DS_READ_U16_D16_HI:
- case AMDGPU::DS_READ_I8_D16_HI:
- case AMDGPU::DS_READ_U8_D16_HI:
- case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
- case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
- case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
- case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
- case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
- case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN: {
- // For these loads that write to the HI part of a register,
- // we should chain them to the op that writes to the LO part
- // of the register to maintain the order.
- unsigned NumOps = Node->getNumOperands();
- SDValue OldChain = Node->getOperand(NumOps-1);
-
- if (OldChain.getValueType() != MVT::Other)
- break;
-
- // Look for the chain to replace to.
- SDValue Lo = Node->getOperand(NumOps-2);
- SDNode *LoNode = Lo.getNode();
- if (LoNode->getNumValues() == 1 ||
- LoNode->getValueType(LoNode->getNumValues() - 1) != MVT::Other)
- break;
-
- SDValue NewChain = Lo.getValue(LoNode->getNumValues() - 1);
- if (NewChain == OldChain) // Already replaced.
- break;
-
- SmallVector<SDValue, 16> Ops;
- for (unsigned I = 0; I < NumOps-1; ++I)
- Ops.push_back(Node->getOperand(I));
- // Repalce the Chain.
- Ops.push_back(NewChain);
- MachineSDNode *NewNode = DAG.getMachineNode(Opcode, SDLoc(Node),
- Node->getVTList(), Ops);
- DAG.setNodeMemRefs(NewNode, Node->memoperands());
- return NewNode;
- }
default:
break;
}
OpenPOWER on IntegriCloud