diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2019-11-19 12:24:55 -0500 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2019-11-19 14:57:35 -0500 |
| commit | 0a8e7ca402eb3470eb5faf4982581771cb849130 (patch) | |
| tree | 35faaf67616672c4138ea7c2af9e5ef36c022bf0 | |
| parent | a84b48d01e3e77b754a6d4d7326e4a0c4faa10ab (diff) | |
| download | bcm5719-llvm-0a8e7ca402eb3470eb5faf4982581771cb849130.tar.gz bcm5719-llvm-0a8e7ca402eb3470eb5faf4982581771cb849130.zip | |
[SLP] fix miscompile on min/max reductions with extra uses (PR43948) (2nd try)
The 1st attempt was reverted because it revealed an existing
bug where we could produce invalid IR (use of value before
definition). That should be fixed with:
rG39de82ecc9c2
The bug manifests as replacing a reduction operand with an undef
value.
The problem appears to be limited to cases where a min/max reduction
has extra uses of the compare operand to the select.
In the general case, we are tracking "ExternallyUsedValues" and
an "IgnoreList" of the reduction operations, but those may not apply
to the final compare+select in a min/max reduction.
For that, we use replaceAllUsesWith (RAUW) to ensure that the new
vectorized reduction values are transferred to all subsequent users.
Differential Revision: https://reviews.llvm.org/D70148
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 35 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/reduction.ll | 4 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll | 2 |
3 files changed, 28 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fc4f63e4a5b..df40afd88cc 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6686,6 +6686,18 @@ public: assert(Pair.first && "DebugLoc must be set."); ExternallyUsedValues[Pair.second].push_back(Pair.first); } + + // The compare instruction of a min/max is the insertion point for new + // instructions and may be replaced with a new compare instruction. + auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) { + assert(isa<SelectInst>(RdxRootInst) && + "Expected min/max reduction to have select root instruction"); + Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition(); + assert(isa<Instruction>(ScalarCond) && + "Expected min/max reduction to have compare condition"); + return cast<Instruction>(ScalarCond); + }; + // The reduction root is used as the insertion point for new instructions, // so set it as externally used to prevent it from being deleted. ExternallyUsedValues[ReductionRoot]; @@ -6741,15 +6753,6 @@ public: DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc(); Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues); - auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) { - assert(isa<SelectInst>(RdxRootInst) && - "Expected min/max reduction to have select root instruction"); - Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition(); - assert(isa<Instruction>(ScalarCond) && - "Expected min/max reduction to have compare condition"); - return cast<Instruction>(ScalarCond); - }; - // Emit a reduction. For min/max, the root is a select, but the insertion // point is the compare condition of that select. Instruction *RdxRootInst = cast<Instruction>(ReductionRoot); @@ -6793,8 +6796,20 @@ public: VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I); } } - // Update users. + + // Update users. For a min/max reduction that ends with a compare and + // select, we also have to RAUW for the compare instruction feeding the + // reduction root. That's because the original compare may have extra uses + // besides the final select of the reduction. + if (ReductionData.isMinMax()) { + if (auto *VecSelect = dyn_cast<SelectInst>(VectorizedTree)) { + Instruction *ScalarCmp = + getCmpForMinMaxReduction(cast<Instruction>(ReductionRoot)); + ScalarCmp->replaceAllUsesWith(VecSelect->getCondition()); + } + } ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the // vector reductions. V.eraseInstructions(IgnoreList); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll index 3a82ee5fa45..0b7e9ef6c17 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll @@ -91,7 +91,7 @@ define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) { ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]] ; CHECK-NEXT: [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]] ; CHECK-NEXT: [[T17:%.*]] = select i1 [[C012345]], i32 [[TMP5]], i32 [[T5]] -; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 undef, i32 3, i32 4 +; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 [[TMP4]], i32 3, i32 4 ; CHECK-NEXT: store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8 ; CHECK-NEXT: ret i32 [[T17]] ; @@ -137,7 +137,7 @@ define i1 @bad_insertpoint_rdx([8 x i32]* %p) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[RDX_MINMAX_SELECT]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 -; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 undef to i32 +; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 [[TMP4]] to i32 ; CHECK-NEXT: [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[OP_EXTRA]] ; CHECK-NEXT: ret i1 [[CMP23_2]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll index a8752bcff3e..468821130e7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll @@ -71,7 +71,7 @@ define void @n() local_unnamed_addr #0 { ; CHECK-NEXT: [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]] ; CHECK-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_1_1]], i32 [[SUB_1_1]] ; CHECK-NEXT: [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP46]], [[OP_EXTRA]] -; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], undef +; CHECK-NEXT: [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[TMP44]] ; CHECK-NEXT: [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP46]], i32 [[OP_EXTRA]] ; CHECK-NEXT: [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]] ; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i32 [[SUB_2_1]], 0 |

