diff options
-rw-r--r-- | llvm/lib/CodeGen/MachinePipeliner.cpp | 37 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll | 54 |
2 files changed, 77 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index a3c600300b4..8015eda1605 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1051,6 +1051,13 @@ static void getUnderlyingObjects(MachineInstr *MI, if (!MM->getValue()) return; GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL); + for (Value *V : Objs) { + if (!isIdentifiedObject(V)) { + Objs.clear(); + return; + } + Objs.push_back(V); + } } /// Add a chain edge between a load and store if the store can be an @@ -1059,6 +1066,8 @@ static void getUnderlyingObjects(MachineInstr *MI, /// but that code doesn't create loop carried dependences. void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads; + Value *UnknownValue = + UndefValue::get(Type::getVoidTy(MF.getFunction().getContext())); for (auto &SU : SUnits) { MachineInstr &MI = *SU.getInstr(); if (isDependenceBarrier(MI, AA)) @@ -1066,6 +1075,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { else if (MI.mayLoad()) { SmallVector<Value *, 4> Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + if (Objs.empty()) + Objs.push_back(UnknownValue); for (auto V : Objs) { SmallVector<SUnit *, 4> &SUs = PendingLoads[V]; SUs.push_back(&SU); @@ -1073,6 +1084,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { } else if (MI.mayStore()) { SmallVector<Value *, 4> Objs; getUnderlyingObjects(&MI, Objs, MF.getDataLayout()); + if (Objs.empty()) + Objs.push_back(UnknownValue); for (auto V : Objs) { MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I = PendingLoads.find(V); @@ -1087,20 +1100,16 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) { // offset, then mark the dependence as loop carried potentially. unsigned BaseReg1, BaseReg2; int64_t Offset1, Offset2; - if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) || - !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { - SDep Dep(Load, SDep::Barrier); - Dep.setLatency(1); - SU.addPred(Dep); - continue; - } - if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { - assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && - "What happened to the chain edge?"); - SDep Dep(Load, SDep::Barrier); - Dep.setLatency(1); - SU.addPred(Dep); - continue; + if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) && + TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) { + if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) { + assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) && + "What happened to the chain edge?"); + SDep Dep(Load, SDep::Barrier); + Dep.setLatency(1); + SU.addPred(Dep); + continue; + } } // Second, the more expensive check that uses alias analysis on the // base registers. If they alias, and the load offset is less than diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll new file mode 100644 index 00000000000..3f8abf0bc57 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test that the pipeliner schedules a store before the load in which there is a +; loop carried dependence. Previously, the loop carried dependence wasn't added +; and the load from iteration n was scheduled prior to the store from iteration +; n-1. + +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: memh({{.*}}) = +; CHECK: = memuh({{.*}}) +; CHECK: endloop0 + +%s.0 = type { i16, i16 } + +; Function Attrs: nounwind +define void @f0() local_unnamed_addr #0 { +b0: + br label %b1 + +b1: ; preds = %b1, %b0 + %v0 = phi i32 [ 0, %b0 ], [ %v22, %b1 ] + %v1 = load %s.0*, %s.0** undef, align 4 + %v2 = getelementptr inbounds %s.0, %s.0* %v1, i32 0, i32 0 + %v3 = load i16, i16* %v2, align 2 + %v4 = add i16 0, %v3 + %v5 = add i16 %v4, 0 + %v6 = add i16 %v5, 0 + %v7 = add i16 %v6, 0 + %v8 = add i16 %v7, 0 + %v9 = add i16 %v8, 0 + %v10 = add i16 %v9, 0 + %v11 = add i16 %v10, 0 + %v12 = add i16 %v11, 0 + %v13 = add i16 %v12, 0 + %v14 = add i16 %v13, 0 + %v15 = add i16 %v14, 0 + %v16 = add i16 %v15, 0 + %v17 = add i16 %v16, 0 + %v18 = add i16 %v17, 0 + %v19 = add i16 %v18, 0 + %v20 = load %s.0*, %s.0** undef, align 4 + store i16 %v19, i16* undef, align 2 + %v21 = getelementptr inbounds %s.0, %s.0* %v20, i32 0, i32 1 + store i16 0, i16* %v21, align 2 + %v22 = add nuw nsw i32 %v0, 1 + %v23 = icmp eq i32 %v22, 6 + br i1 %v23, label %b2, label %b1 + +b2: ; preds = %b1 + ret void +} + +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } |