diff options
| -rw-r--r-- | llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 37 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_store.ll | 28 |
3 files changed, 66 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index e2ee9f28f3b..7776dffb4e9 100644 --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -488,7 +488,7 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) { // Shorten the way if the mask is a vector of constants. if (isConstantIntVector(Mask)) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue()) + if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) continue; Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx)); diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 8ee23d6feff..2d6b19b334e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2964,3 +2964,40 @@ define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %f %res3 = fadd <16 x float> %res2, %res ret <16 x float>%res3 } + +define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) { +; KNL_64-LABEL: zero_mask: +; KNL_64: # %bb.0: +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL_64-NEXT: kxorw %k0, %k0, %k1 +; KNL_64-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq +; +; KNL_32-LABEL: zero_mask: +; KNL_32: # %bb.0: +; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 +; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 +; KNL_32-NEXT: kxorw %k0, %k0, %k1 +; KNL_32-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} +; KNL_32-NEXT: vzeroupper +; KNL_32-NEXT: retl +; +; SKX-LABEL: zero_mask: +; SKX: # %bb.0: +; SKX-NEXT: kxorw %k0, %k0, %k1 +; SKX-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} +; SKX-NEXT: retq +; +; SKX_32-LABEL: zero_mask: +; SKX_32: # %bb.0: +; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1 +; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1 +; SKX_32-NEXT: kxorw %k0, %k0, %k1 +; SKX_32-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} +; SKX_32-NEXT: retl + call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer) + ret void +} diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index efbb1ef8cc6..180197ccbcf 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -5505,6 +5505,34 @@ define void @widen_masked_store(<3 x i32> %v, <3 x i32>* %p, <3 x i1> %mask) { ret void } +define void @zero_mask(<2 x double>* %addr, <2 x double> %val) { +; SSE-LABEL: zero_mask: +; SSE: ## %bb.0: +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: zero_mask: +; AVX1OR2: ## %bb.0: +; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1OR2-NEXT: vmaskmovpd %xmm0, %xmm1, (%rdi) +; AVX1OR2-NEXT: retq +; +; AVX512F-LABEL: zero_mask: +; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: kxorw %k0, %k0, %k1 +; AVX512F-NEXT: vmovupd %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: zero_mask: +; AVX512VL: ## %bb.0: +; AVX512VL-NEXT: kxorw %k0, %k0, %k1 +; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) {%k1} +; AVX512VL-NEXT: retq + call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer) + ret void +} + declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>) declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) |

