diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 6 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-masked-memops.ll | 13 |
2 files changed, 19 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index b8396eee844..f6ed6900a6a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -876,6 +876,11 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { return true; } + // The SSE2 version is too weird (eg, unaligned but non-temporal) to do + // anything else at this level. + if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu) + return false; + auto *ConstMask = dyn_cast<ConstantDataVector>(Mask); if (!ConstMask) return false; @@ -1674,6 +1679,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return I; break; + case Intrinsic::x86_sse2_maskmov_dqu: case Intrinsic::x86_avx_maskstore_ps: case Intrinsic::x86_avx_maskstore_pd: case Intrinsic::x86_avx_maskstore_ps_256: diff --git a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/x86-masked-memops.ll index 64c503440d5..736af173ad4 100644 --- a/llvm/test/Transforms/InstCombine/x86-masked-memops.ll +++ b/llvm/test/Transforms/InstCombine/x86-masked-memops.ll @@ -267,6 +267,17 @@ define void @mstore_v4i64(i8* %f, <4 x i64> %v) { ; CHECK-NEXT: ret void } +; The original SSE2 masked store variant. + +define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) { + tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p) + ret void + +; CHECK-LABEL: @mstore_v16i8_sse2_zeros( +; CHECK-NEXT: ret void +} + + declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) @@ -287,3 +298,5 @@ declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) +declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) + |