summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp35
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/reduction.ll4
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll2
3 files changed, 28 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fc4f63e4a5b..df40afd88cc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6686,6 +6686,18 @@ public:
assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
}
+
+ // The compare instruction of a min/max is the insertion point for new
+ // instructions and may be replaced with a new compare instruction.
+ auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
+ assert(isa<SelectInst>(RdxRootInst) &&
+ "Expected min/max reduction to have select root instruction");
+ Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
+ assert(isa<Instruction>(ScalarCond) &&
+ "Expected min/max reduction to have compare condition");
+ return cast<Instruction>(ScalarCond);
+ };
+
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
@@ -6741,15 +6753,6 @@ public:
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
- auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
- assert(isa<SelectInst>(RdxRootInst) &&
- "Expected min/max reduction to have select root instruction");
- Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
- assert(isa<Instruction>(ScalarCond) &&
- "Expected min/max reduction to have compare condition");
- return cast<Instruction>(ScalarCond);
- };
-
// Emit a reduction. For min/max, the root is a select, but the insertion
// point is the compare condition of that select.
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
@@ -6793,8 +6796,20 @@ public:
VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I);
}
}
- // Update users.
+
+ // Update users. For a min/max reduction that ends with a compare and
+ // select, we also have to RAUW for the compare instruction feeding the
+ // reduction root. That's because the original compare may have extra uses
+ // besides the final select of the reduction.
+ if (ReductionData.isMinMax()) {
+ if (auto *VecSelect = dyn_cast<SelectInst>(VectorizedTree)) {
+ Instruction *ScalarCmp =
+ getCmpForMinMaxReduction(cast<Instruction>(ReductionRoot));
+ ScalarCmp->replaceAllUsesWith(VecSelect->getCondition());
+ }
+ }
ReductionRoot->replaceAllUsesWith(VectorizedTree);
+
// Mark all scalar reduction ops for deletion, they are replaced by the
// vector reductions.
V.eraseInstructions(IgnoreList);
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index 3a82ee5fa45..0b7e9ef6c17 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -91,7 +91,7 @@ define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) {
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]]
; CHECK-NEXT: [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]]
; CHECK-NEXT: [[T17:%.*]] = select i1 [[C012345]], i32 [[TMP5]], i32 [[T5]]
-; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 undef, i32 3, i32 4
+; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 [[TMP4]], i32 3, i32 4
; CHECK-NEXT: store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8
; CHECK-NEXT: ret i32 [[T17]]
;
@@ -137,7 +137,7 @@ define i1 @bad_insertpoint_rdx([8 x i32]* %p) #0 {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0
-; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32
+; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 [[TMP4]] to i32
; CHECK-NEXT: [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[OP_EXTRA]]
; CHECK-NEXT: ret i1 [[CMP23_2]]
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
index a8752bcff3e..468821130e7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
@@ -71,7 +71,7 @@ define void @n() local_unnamed_addr #0 {
; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]]
; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP46]], [[OP_EXTRA]]
-; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], undef
+; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[TMP44]]
; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP46]], i32 [[OP_EXTRA]]
; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]]
; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_2_1]], 0
OpenPOWER on IntegriCloud