diff options
author | Tim Northover <tnorthover@apple.com> | 2019-08-27 10:21:11 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2019-08-27 10:21:11 +0000 |
commit | a7f226f9dba3472173a1ea261a3f79114caf936c (patch) | |
tree | 690de0e570c428653e538fea3d83b063185487f6 | |
parent | bccbd74c625507f52ed4666e0ce138bd99c618ab (diff) | |
download | bcm5719-llvm-a7f226f9dba3472173a1ea261a3f79114caf936c.tar.gz bcm5719-llvm-a7f226f9dba3472173a1ea261a3f79114caf936c.zip |
AArch64: avoid creating cycle in DAG for post-increment NEON ops.
Inserting a value into Visited has the effect of terminating a search for
predecessors if that node is seen. This is legitimate for the base address, and
acts as a slight performance optimization, but the vector-building node can be
paert of a legitimate cycle so we shouldn't stop searching there.
PR43056.
llvm-svn: 370036
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll | 19 |
2 files changed, 20 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index be248ee898c..6d2f363858e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N, // are predecessors to each other or the Vector. SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 16> Worklist; - Visited.insert(N); + Visited.insert(Addr.getNode()); Worklist.push_back(User); Worklist.push_back(LD); Worklist.push_back(Vector.getNode()); diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 8b6a4cae7ed..f3ac9b21f53 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -6319,3 +6319,22 @@ define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* store <8 x i8> %sub, <8 x i8>* %p ret void } + +define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) { +; CHECK-LABEL: test_inc_cycle: +; CHECK: ld1.s { v0 }[0], [x0]{{$}} + + %elt = load i32, i32* %in + %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0 + + ; %inc cannot be %elt directly because we check that the load is only + ; used by the insert before trying to form post-inc. + %inc.vec = bitcast <4 x i32> %newvec to <2 x i64> + %inc = extractelement <2 x i64> %inc.vec, i32 0 + %newaddr = getelementptr i32, i32* %in, i64 %inc + store i32* %newaddr, i32** @var + + ret <4 x i32> %newvec +} + +@var = global i32* null |