summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp20
-rw-r--r--llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll20
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll8
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll2
4 files changed, 18 insertions, 32 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9c6cc503ef0..c6d26abc76a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -4250,14 +4250,6 @@ class HorizontalReduction {
MapVector<Instruction *, Value *> ExtraArgs;
BinaryOperator *ReductionRoot = nullptr;
- // After successfull horizontal reduction vectorization attempt for PHI node
- // vectorizer tries to update root binary op by combining vectorized tree and
- // the ReductionPHI node. But during vectorization this ReductionPHI can be
- // vectorized itself and replaced by the undef value, while the instruction
- // itself is marked for deletion. This 'marked for deletion' PHI node then can
- // be used in new binary operation, causing "Use still stuck around after Def
- // is destroyed" crash upon PHI node deletion.
- WeakVH ReductionPHI;
/// The opcode of the reduction.
Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd;
@@ -4318,7 +4310,6 @@ public:
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
ReductionRoot = B;
- ReductionPHI = Phi;
// We currently only support adds.
if ((ReductionOpcode != Instruction::Add &&
@@ -4406,9 +4397,9 @@ public:
Stack.push_back(std::make_pair(I, 0));
continue;
}
- // NextV is an extra argument for TreeN (its parent operation).
- markExtraArg(Stack.back(), NextV);
}
+ // NextV is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), NextV);
}
return true;
}
@@ -4497,12 +4488,7 @@ public:
}
}
// Update users.
- if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
- assert(ReductionRoot && "Need a reduction operation");
- ReductionRoot->setOperand(0, VectorizedTree);
- ReductionRoot->setOperand(1, ReductionPHI);
- } else
- ReductionRoot->replaceAllUsesWith(VectorizedTree);
+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
}
return VectorizedTree != nullptr;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index 4085e099b90..b7fa5452f25 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -9,7 +9,7 @@ target triple = "aarch64--linux-gnu"
@a = common global [80 x i8] zeroinitializer, align 16
; DEFAULT-LABEL: @PR28330(
-; DEFAULT: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
+; DEFAULT: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
; DEFAULT: %[[S0:.+]] = select <8 x i1> %1, <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; DEFAULT: %[[R0:.+]] = shufflevector <8 x i32> %[[S0]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R1:.+]] = add <8 x i32> %[[S0]], %[[R0]]
@@ -18,10 +18,10 @@ target triple = "aarch64--linux-gnu"
; DEFAULT: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
; DEFAULT: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
-; DEFAULT: %tmp34 = add i32 %[[R6]], %tmp17
+; DEFAULT: %bin.extra = add i32 %[[R6]], %tmp17
;
; GATHER-LABEL: @PR28330(
-; GATHER: %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ]
+; GATHER: %tmp17 = phi i32 [ %bin.extra, %for.body ], [ 0, %entry ]
; GATHER: %tmp19 = select i1 %tmp1, i32 -720, i32 -80
; GATHER: %tmp21 = select i1 %tmp3, i32 -720, i32 -80
; GATHER: %tmp23 = select i1 %tmp5, i32 -720, i32 -80
@@ -45,7 +45,7 @@ target triple = "aarch64--linux-gnu"
; GATHER: %[[R4:.+]] = shufflevector <8 x i32> %[[R3]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]]
; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0
-; GATHER: %tmp34 = add i32 %[[R6]], %tmp17
+; GATHER: %bin.extra = add i32 %[[R6]], %tmp17
;
; MAX-COST-LABEL: @PR28330(
; MAX-COST-NOT: shufflevector
@@ -98,7 +98,7 @@ define void @PR32038(i32 %n) {
; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
; DEFAULT-NEXT: br label [[FOR_BODY:%.*]]
; DEFAULT: for.body:
-; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef
; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef
@@ -114,8 +114,8 @@ define void @PR32038(i32 %n) {
; DEFAULT-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; DEFAULT-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; DEFAULT-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; DEFAULT-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP3]], -5
-; DEFAULT-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]]
+; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5
+; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef
; DEFAULT-NEXT: br label [[FOR_BODY]]
;
; GATHER-LABEL: @PR32038(
@@ -138,7 +138,7 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
; GATHER-NEXT: br label [[FOR_BODY:%.*]]
; GATHER: for.body:
-; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; GATHER-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80
; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP19]]
; GATHER-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80
@@ -169,8 +169,8 @@ define void @PR32038(i32 %n) {
; GATHER-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; GATHER-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; GATHER-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; GATHER-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP8]], -5
-; GATHER-NEXT: [[TMP34]] = add i32 [[BIN_EXTRA]], [[TMP17]]
+; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP8]], -5
+; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
; GATHER-NEXT: br label [[FOR_BODY]]
;
; MAX-COST-LABEL: @PR32038(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
index 1ffbe0d87bf..8370c52e2ea 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction_loads.ll
@@ -14,7 +14,7 @@ define i32 @test(i32* nocapture readonly %p) {
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* %p, i64 7
; CHECK-NEXT: br label %for.body
; CHECK: for.body:
-; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %add.7, %for.body ]
+; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %bin.extra, %for.body ]
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %p to <8 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, [[TMP1]]
@@ -32,10 +32,10 @@ define i32 @test(i32* nocapture readonly %p) {
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
-; CHECK-NEXT: [[ADD_7:%.*]] = add i32 [[TMP4]], [[SUM]]
-; CHECK-NEXT: br i1 true, label %for.end, label %for.body
+; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP4]], [[SUM]]
+; CHECK: br i1 true, label %for.end, label %for.body
; CHECK: for.end:
-; CHECK-NEXT: ret i32 [[ADD_7]]
+; CHECK-NEXT: ret i32 [[BIN_EXTRA]]
;
entry:
%arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
index 5377ee82cf9..a01646b9dbe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -12,7 +12,7 @@ define i32 @foo(i32* nocapture readonly %diff) #0 {
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
-; CHECK-NEXT: [[ADD52:%.*]] = add nsw i32 [[TMP15]],
+; CHECK: [[ADD52:%.*]] = add i32 [[TMP15]],
; CHECK: ret i32 [[ADD52]]
;
entry:
OpenPOWER on IntegriCloud