summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2019-08-27 10:21:11 +0000
committerTim Northover <tnorthover@apple.com>2019-08-27 10:21:11 +0000
commita7f226f9dba3472173a1ea261a3f79114caf936c (patch)
tree690de0e570c428653e538fea3d83b063185487f6
parentbccbd74c625507f52ed4666e0ce138bd99c618ab (diff)
downloadbcm5719-llvm-a7f226f9dba3472173a1ea261a3f79114caf936c.tar.gz
bcm5719-llvm-a7f226f9dba3472173a1ea261a3f79114caf936c.zip
AArch64: avoid creating cycle in DAG for post-increment NEON ops.
Inserting a value into Visited has the effect of terminating a search for predecessors if that node is seen. This is legitimate for the base address, and acts as a slight performance optimization, but the vector-building node can be paert of a legitimate cycle so we shouldn't stop searching there. PR43056. llvm-svn: 370036
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll19
2 files changed, 20 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index be248ee898c..6d2f363858e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N,
// are predecessors to each other or the Vector.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
- Visited.insert(N);
+ Visited.insert(Addr.getNode());
Worklist.push_back(User);
Worklist.push_back(LD);
Worklist.push_back(Vector.getNode());
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 8b6a4cae7ed..f3ac9b21f53 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -6319,3 +6319,22 @@ define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8*
store <8 x i8> %sub, <8 x i8>* %p
ret void
}
+
+define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) {
+; CHECK-LABEL: test_inc_cycle:
+; CHECK: ld1.s { v0 }[0], [x0]{{$}}
+
+ %elt = load i32, i32* %in
+ %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
+
+ ; %inc cannot be %elt directly because we check that the load is only
+ ; used by the insert before trying to form post-inc.
+ %inc.vec = bitcast <4 x i32> %newvec to <2 x i64>
+ %inc = extractelement <2 x i64> %inc.vec, i32 0
+ %newaddr = getelementptr i32, i32* %in, i64 %inc
+ store i32* %newaddr, i32** @var
+
+ ret <4 x i32> %newvec
+}
+
+@var = global i32* null
OpenPOWER on IntegriCloud