summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-11-19 10:47:07 -0500
committerSanjay Patel <spatel@rotateright.com>2019-11-19 10:50:10 -0500
commit39de82ecc9c2e461e1318ed9926286a1eed2be3f (patch)
tree1aed238e7c5906c68d40fc9fbc5484bbaa9d4b44
parent41bac76da873bf9b4b5d1353299ca68b4ad26ff3 (diff)
downloadbcm5719-llvm-39de82ecc9c2e461e1318ed9926286a1eed2be3f.tar.gz
bcm5719-llvm-39de82ecc9c2e461e1318ed9926286a1eed2be3f.zip
[SLP] fix insertion point for min/max reduction
As discussed in D70148 (and caused a revert of the original commit): if we insert at the select, then we can produce invalid IR because the replacement for the compare may have uses before the select.
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp19
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/reduction.ll2
2 files changed, 18 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 373d7a7ffd1..fc4f63e4a5b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6741,8 +6741,23 @@ public:
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
- // Emit a reduction.
- Builder.SetInsertPoint(cast<Instruction>(ReductionRoot));
+ auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
+ assert(isa<SelectInst>(RdxRootInst) &&
+ "Expected min/max reduction to have select root instruction");
+ Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
+ assert(isa<Instruction>(ScalarCond) &&
+ "Expected min/max reduction to have compare condition");
+ return cast<Instruction>(ScalarCond);
+ };
+
+ // Emit a reduction. For min/max, the root is a select, but the insertion
+ // point is the compare condition of that select.
+ Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
+ if (ReductionData.isMinMax())
+ Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
+ else
+ Builder.SetInsertPoint(RdxRootInst);
+
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
if (VectorizedTree) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index f6e1d0ad2fe..3a82ee5fa45 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -131,13 +131,13 @@ define i1 @bad_insertpoint_rdx([8 x i32]* %p) #0 {
; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* [[P:%.*]], i64 0, i64 0
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX22]] to <2 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 16
-; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> undef, <2 x i32> <i32 1, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i32> [[TMP2]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i32> [[TMP2]], <2 x i32> [[RDX_SHUF]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RDX_MINMAX_SELECT]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0
+; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32
; CHECK-NEXT: [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[OP_EXTRA]]
; CHECK-NEXT: ret i1 [[CMP23_2]]
;
OpenPOWER on IntegriCloud