2 files changed, 77 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index a3c600300b4..8015eda1605 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1051,6 +1051,13 @@ static void getUnderlyingObjects(MachineInstr *MI,
   if (!MM->getValue())
     return;
   GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+  for (Value *V : Objs) {
+    if (!isIdentifiedObject(V)) {
+      Objs.clear();
+      return;
+    }
+    Objs.push_back(V);
+  }
 }
 
 /// Add a chain edge between a load and store if the store can be an
@@ -1059,6 +1066,8 @@ static void getUnderlyingObjects(MachineInstr *MI,
 /// but that code doesn't create loop carried dependences.
 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
   MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+  Value *UnknownValue =
+    UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
   for (auto &SU : SUnits) {
     MachineInstr &MI = *SU.getInstr();
     if (isDependenceBarrier(MI, AA))
@@ -1066,6 +1075,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
     else if (MI.mayLoad()) {
       SmallVector<Value *, 4> Objs;
       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+      if (Objs.empty())
+        Objs.push_back(UnknownValue);
       for (auto V : Objs) {
         SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
         SUs.push_back(&SU);
@@ -1073,6 +1084,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
     } else if (MI.mayStore()) {
       SmallVector<Value *, 4> Objs;
       getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+      if (Objs.empty())
+        Objs.push_back(UnknownValue);
       for (auto V : Objs) {
         MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
             PendingLoads.find(V);
@@ -1087,20 +1100,16 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
           // offset, then mark the dependence as loop carried potentially.
           unsigned BaseReg1, BaseReg2;
           int64_t Offset1, Offset2;
-          if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
-              !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
-            SDep Dep(Load, SDep::Barrier);
-            Dep.setLatency(1);
-            SU.addPred(Dep);
-            continue;            
-          }
-          if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
-            assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
-                   "What happened to the chain edge?");
-            SDep Dep(Load, SDep::Barrier);
-            Dep.setLatency(1);
-            SU.addPred(Dep);
-            continue;
+          if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
+              TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+            if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+              assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+                     "What happened to the chain edge?");
+              SDep Dep(Load, SDep::Barrier);
+              Dep.setLatency(1);
+              SU.addPred(Dep);
+              continue;
+            }
           }
           // Second, the more expensive check that uses alias analysis on the
           // base registers. If they alias, and the load offset is less than
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
new file mode 100644
index 00000000000..3f8abf0bc57
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the pipeliner schedules a store before the load in which there is a
+; loop carried dependence. Previously, the loop carried dependence wasn't added
+; and the load from iteration n was scheduled prior to the store from iteration
+; n-1.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: memh({{.*}}) =
+; CHECK: = memuh({{.*}})
+; CHECK: endloop0
+
+%s.0 = type { i16, i16 }
+
+; Function Attrs: nounwind
+define void @f0() local_unnamed_addr #0 {
+b0:
+  br label %b1
+
+b1:                                               ; preds = %b1, %b0
+  %v0 = phi i32 [ 0, %b0 ], [ %v22, %b1 ]
+  %v1 = load %s.0*, %s.0** undef, align 4
+  %v2 = getelementptr inbounds %s.0, %s.0* %v1, i32 0, i32 0
+  %v3 = load i16, i16* %v2, align 2
+  %v4 = add i16 0, %v3
+  %v5 = add i16 %v4, 0
+  %v6 = add i16 %v5, 0
+  %v7 = add i16 %v6, 0
+  %v8 = add i16 %v7, 0
+  %v9 = add i16 %v8, 0
+  %v10 = add i16 %v9, 0
+  %v11 = add i16 %v10, 0
+  %v12 = add i16 %v11, 0
+  %v13 = add i16 %v12, 0
+  %v14 = add i16 %v13, 0
+  %v15 = add i16 %v14, 0
+  %v16 = add i16 %v15, 0
+  %v17 = add i16 %v16, 0
+  %v18 = add i16 %v17, 0
+  %v19 = add i16 %v18, 0
+  %v20 = load %s.0*, %s.0** undef, align 4
+  store i16 %v19, i16* undef, align 2
+  %v21 = getelementptr inbounds %s.0, %s.0* %v20, i32 0, i32 1
+  store i16 0, i16* %v21, align 2
+  %v22 = add nuw nsw i32 %v0, 1
+  %v23 = icmp eq i32 %v22, 6
+  br i1 %v23, label %b2, label %b1
+
+b2:                                               ; preds = %b1
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }