summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp37
-rw-r--r--llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll54
2 files changed, 77 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index a3c600300b4..8015eda1605 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1051,6 +1051,13 @@ static void getUnderlyingObjects(MachineInstr *MI,
if (!MM->getValue())
return;
GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+ for (Value *V : Objs) {
+ if (!isIdentifiedObject(V)) {
+ Objs.clear();
+ return;
+ }
+ Objs.push_back(V);
+ }
}
/// Add a chain edge between a load and store if the store can be an
@@ -1059,6 +1066,8 @@ static void getUnderlyingObjects(MachineInstr *MI,
/// but that code doesn't create loop carried dependences.
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ Value *UnknownValue =
+ UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
for (auto &SU : SUnits) {
MachineInstr &MI = *SU.getInstr();
if (isDependenceBarrier(MI, AA))
@@ -1066,6 +1075,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
else if (MI.mayLoad()) {
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
for (auto V : Objs) {
SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
SUs.push_back(&SU);
@@ -1073,6 +1084,8 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
} else if (MI.mayStore()) {
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+ if (Objs.empty())
+ Objs.push_back(UnknownValue);
for (auto V : Objs) {
MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
PendingLoads.find(V);
@@ -1087,20 +1100,16 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// offset, then mark the dependence as loop carried potentially.
unsigned BaseReg1, BaseReg2;
int64_t Offset1, Offset2;
- if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
- !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
- }
- if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
- "What happened to the chain edge?");
- SDep Dep(Load, SDep::Barrier);
- Dep.setLatency(1);
- SU.addPred(Dep);
- continue;
+ if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
+ TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+ if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+ "What happened to the chain edge?");
+ SDep Dep(Load, SDep::Barrier);
+ Dep.setLatency(1);
+ SU.addPred(Dep);
+ continue;
+ }
}
// Second, the more expensive check that uses alias analysis on the
// base registers. If they alias, and the load offset is less than
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
new file mode 100644
index 00000000000..3f8abf0bc57
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the pipeliner schedules a store before the load in which there is a
+; loop carried dependence. Previously, the loop carried dependence wasn't added
+; and the load from iteration n was scheduled prior to the store from iteration
+; n-1.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: memh({{.*}}) =
+; CHECK: = memuh({{.*}})
+; CHECK: endloop0
+
+%s.0 = type { i16, i16 }
+
+; Function Attrs: nounwind
+define void @f0() local_unnamed_addr #0 {
+b0:
+ br label %b1
+
+b1: ; preds = %b1, %b0
+ %v0 = phi i32 [ 0, %b0 ], [ %v22, %b1 ]
+ %v1 = load %s.0*, %s.0** undef, align 4
+ %v2 = getelementptr inbounds %s.0, %s.0* %v1, i32 0, i32 0
+ %v3 = load i16, i16* %v2, align 2
+ %v4 = add i16 0, %v3
+ %v5 = add i16 %v4, 0
+ %v6 = add i16 %v5, 0
+ %v7 = add i16 %v6, 0
+ %v8 = add i16 %v7, 0
+ %v9 = add i16 %v8, 0
+ %v10 = add i16 %v9, 0
+ %v11 = add i16 %v10, 0
+ %v12 = add i16 %v11, 0
+ %v13 = add i16 %v12, 0
+ %v14 = add i16 %v13, 0
+ %v15 = add i16 %v14, 0
+ %v16 = add i16 %v15, 0
+ %v17 = add i16 %v16, 0
+ %v18 = add i16 %v17, 0
+ %v19 = add i16 %v18, 0
+ %v20 = load %s.0*, %s.0** undef, align 4
+ store i16 %v19, i16* undef, align 2
+ %v21 = getelementptr inbounds %s.0, %s.0* %v20, i32 0, i32 1
+ store i16 0, i16* %v21, align 2
+ %v22 = add nuw nsw i32 %v0, 1
+ %v23 = icmp eq i32 %v22, 6
+ br i1 %v23, label %b2, label %b1
+
+b2: ; preds = %b1
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
OpenPOWER on IntegriCloud