diff options
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/masked_intrinsics.ll | 3 |
2 files changed, 6 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 2dea7eea404..1d52938d1f8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1437,7 +1437,11 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, switch (II->getIntrinsicID()) { case Intrinsic::masked_gather: // fallthrough case Intrinsic::masked_load: { - APInt DemandedPtrs(DemandedElts), DemandedPassThrough(DemandedElts); + // Subtlety: If we load from a pointer, the pointer must be valid + // regardless of whether the element is demanded. Doing otherwise risks + // segfaults which didn't exist in the original program. + APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)), + DemandedPassThrough(DemandedElts); if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2))) for (unsigned i = 0; i < VWidth; i++) { Constant *CElt = CV->getAggregateElement(i); diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll index 48c8f78f990..582fd8f5c82 100644 --- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll @@ -56,10 +56,9 @@ define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt) { ret <2 x double> %res } -; FIXME: the output here demonstrates a miscompile! define double @load_all(double* %base, double %pt) { ; CHECK-LABEL: @load_all( -; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef> +; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef) ; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2 ; CHECK-NEXT: ret double [[ELT]] |

