summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-08-07 19:21:05 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-08-07 19:21:05 +0000
commit0edcd0278d05ab6a5196d0d2a03f943b11218bac (patch)
tree5a89bd59ebb84804a6aebf4ed6e3c39ef872c84c
parentf4f5b7eea37926ba8bf9de31c9e9b5aa1e9c746c (diff)
downloadbcm5719-llvm-0edcd0278d05ab6a5196d0d2a03f943b11218bac.tar.gz
bcm5719-llvm-0edcd0278d05ab6a5196d0d2a03f943b11218bac.zip
[SLP] Fix insert point for reused extract instructions.
Summary: Reworked the previously committed patch to insert shuffles for reused extract element instructions in the correct position. Previous logic was incorrect, and might lead to the crash with PHIs and EH instructions. Reviewers: efriedma, javed.absar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D50143 llvm-svn: 339166
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll95
2 files changed, 96 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5c2efe885e2..32df6d58157 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3109,14 +3109,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
if (NeedToShuffleReuses) {
// TODO: Merge this shuffle with the ReorderShuffleMask.
- if (!E->ReorderIndices.empty())
+ if (E->ReorderIndices.empty())
Builder.SetInsertPoint(VL0);
- else if (auto *I = dyn_cast<Instruction>(V))
- Builder.SetInsertPoint(I->getParent(),
- std::next(I->getIterator()));
- else
- Builder.SetInsertPoint(&F->getEntryBlock(),
- F->getEntryBlock().getFirstInsertionPt());
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
E->ReuseShuffleIndices, "shuffle");
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
index 1ab4a13260e..1a981a32804 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -27,3 +27,98 @@ define void @f1(<2 x i16> %x, i16* %a) {
store i16 %t2, i16* %ptr3
ret void
}
+
+define void @f2(<2 x i16> %x, i16* %a) {
+; CHECK-LABEL: @f2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: cont:
+; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
+; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
+; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %cont
+
+cont: ; preds = %entry, %cont
+ %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
+ %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
+ %t2 = extractelement <2 x i16> %xx, i32 0
+ %t3 = extractelement <2 x i16> %xx, i32 1
+ %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+ %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+ %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+ %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+ store i16 %t2, i16* %a
+ store i16 %t2, i16* %ptr0
+ store i16 %t3, i16* %ptr1
+ store i16 %t3, i16* %ptr2
+ store i16 %t2, i16* %ptr3
+ %a_val = load i16, i16* %a, align 2
+ %cmp = icmp eq i16 %a_val, 0
+ br i1 %cmp, label %cont, label %exit
+
+exit: ; preds = %cont
+ ret void
+}
+
+define void @f3(<2 x i16> %x, i16* %a) {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: cont:
+; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
+; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0
+; CHECK-NEXT: store i16 [[TMP0]], i16* [[A]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>*
+; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT: [[A_VAL:%.*]] = load i16, i16* [[A]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A_VAL]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[CONT]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %cont
+
+cont: ; preds = %entry, %cont
+ %xx = phi <2 x i16> [ %x, %entry ], [ undef, %cont ]
+ %aa = phi i16* [ %a, %entry ], [ undef, %cont ]
+ %t2 = extractelement <2 x i16> %xx, i32 0
+ %t3 = extractelement <2 x i16> %xx, i32 1
+ %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
+ %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
+ %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
+ %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3
+ store i16 %t3, i16* %a
+ store i16 %t3, i16* %ptr0
+ store i16 %t2, i16* %ptr1
+ store i16 %t2, i16* %ptr2
+ store i16 %t3, i16* %ptr3
+ %a_val = load i16, i16* %a, align 2
+ %cmp = icmp eq i16 %a_val, 0
+ br i1 %cmp, label %cont, label %exit
+
+exit: ; preds = %cont
+ ret void
+}
OpenPOWER on IntegriCloud