diff options
-rw-r--r-- | llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 37 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll | 42 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 14 |
4 files changed, 48 insertions, 77 deletions
diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index 9387722bfeb..30659a4df47 100644 --- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -181,8 +181,7 @@ static void scalarizeMaskedLoad(CallInst *CI) { // // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_load = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.load, label %else + // br i1 %mask_1, label %cond.load, label %else // if (Idx > 0) { Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); @@ -194,8 +193,6 @@ static void scalarizeMaskedLoad(CallInst *CI) { Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); // Create "cond" block // @@ -216,7 +213,7 @@ static void scalarizeMaskedLoad(CallInst *CI) { CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); PrevIfBlock = IfBlock; IfBlock = NewIfBlock; @@ -311,13 +308,10 @@ static void scalarizeMaskedStore(CallInst *CI) { // Fill the "else" block, created in the previous iteration // // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_store, label %cond.store, label %else + // br i1 %mask_1, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); // Create "cond" block // @@ -339,7 +333,7 @@ static void scalarizeMaskedStore(CallInst *CI) { CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); IfBlock = NewIfBlock; } @@ -430,8 +424,7 @@ static void scalarizeMaskedGather(CallInst *CI) { // Fill the "else" block, created in the previous iteration // // %Mask1 = extractelement <16 x i1> %Mask, i32 1 - // %ToLoad1 = icmp eq i1 %Mask1, true - // br i1 %ToLoad1, label %cond.load, label %else + // br i1 %Mask1, label %cond.load, label %else // if (Idx > 0) { Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); @@ -443,9 +436,6 @@ static void scalarizeMaskedGather(CallInst *CI) { Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToLoad" + Twine(Idx)); // Create "cond" block // @@ -467,7 +457,7 @@ static void scalarizeMaskedGather(CallInst *CI) { BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); PrevIfBlock = IfBlock; IfBlock = NewIfBlock; @@ -549,15 +539,11 @@ static void scalarizeMaskedScatter(CallInst *CI) { for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { // Fill the "else" block, created in the previous iteration // - // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx - // % ToStore = icmp eq i1 % Mask1, true - // br i1 % ToStore, label %cond.store, label %else + // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx + // br i1 %Mask1, label %cond.store, label %else // Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToStore" + Twine(Idx)); // Create "cond" block // @@ -578,7 +564,7 @@ static void scalarizeMaskedScatter(CallInst *CI) { BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); Builder.SetInsertPoint(InsertPt); Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr); OldBr->eraseFromParent(); IfBlock = NewIfBlock; } diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index bf72c726f7d..e8f0321544d 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -70,8 +70,7 @@ declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , ; SCALAR: else: ; SCALAR-NEXT: %res.phi.else = phi ; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1 -; SCALAR-NEXT: %ToLoad1 = icmp eq i1 %Mask1, true -; SCALAR-NEXT: br i1 %ToLoad1, label %cond.load1, label %else2 +; SCALAR-NEXT: br i1 %Mask1, label %cond.load1, label %else2 define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) { ; KNL_64-LABEL: test2: @@ -213,8 +212,7 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) { ; SCALAR-LABEL: test5 ; SCALAR: %Mask0 = extractelement <16 x i1> %imask, i32 0 -; SCALAR-NEXT: %ToStore0 = icmp eq i1 %Mask0, true -; SCALAR-NEXT: br i1 %ToStore0, label %cond.store, label %else +; SCALAR-NEXT: br i1 %Mask0, label %cond.store, label %else ; SCALAR: cond.store: ; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i32 0 ; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i32 0 @@ -222,8 +220,7 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) { ; SCALAR-NEXT: br label %else ; SCALAR: else: ; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1 -; SCALAR-NEXT: %ToStore1 = icmp eq i1 %Mask1, true -; SCALAR-NEXT: br i1 %ToStore1, label %cond.store1, label %else2 +; SCALAR-NEXT: br i1 %Mask1, label %cond.store1, label %else2 define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { ; KNL_64-LABEL: test5: @@ -2448,45 +2445,41 @@ define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) { ; KNL_64-LABEL: v1_scatter: ; KNL_64: # %bb.0: ; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: jne .LBB43_1 -; KNL_64-NEXT: # %bb.2: # %else -; KNL_64-NEXT: retq -; KNL_64-NEXT: .LBB43_1: # %cond.store +; KNL_64-NEXT: je .LBB43_2 +; KNL_64-NEXT: # %bb.1: # %cond.store ; KNL_64-NEXT: movl %edi, (%rsi) +; KNL_64-NEXT: .LBB43_2: # %else ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: v1_scatter: ; KNL_32: # %bb.0: ; KNL_32-NEXT: testb $1, {{[0-9]+}}(%esp) -; KNL_32-NEXT: jne .LBB43_1 -; KNL_32-NEXT: # %bb.2: # %else -; KNL_32-NEXT: retl -; KNL_32-NEXT: .LBB43_1: # %cond.store +; KNL_32-NEXT: je .LBB43_2 +; KNL_32-NEXT: # %bb.1: # %cond.store ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; KNL_32-NEXT: movl %ecx, (%eax) +; KNL_32-NEXT: .LBB43_2: # %else ; KNL_32-NEXT: retl ; ; SKX-LABEL: v1_scatter: ; SKX: # %bb.0: ; SKX-NEXT: testb $1, %dl -; SKX-NEXT: jne .LBB43_1 -; SKX-NEXT: # %bb.2: # %else -; SKX-NEXT: retq -; SKX-NEXT: .LBB43_1: # %cond.store +; SKX-NEXT: je .LBB43_2 +; SKX-NEXT: # %bb.1: # %cond.store ; SKX-NEXT: movl %edi, (%rsi) +; SKX-NEXT: .LBB43_2: # %else ; SKX-NEXT: retq ; ; SKX_32-LABEL: v1_scatter: ; SKX_32: # %bb.0: ; SKX_32-NEXT: testb $1, {{[0-9]+}}(%esp) -; SKX_32-NEXT: jne .LBB43_1 -; SKX_32-NEXT: # %bb.2: # %else -; SKX_32-NEXT: retl -; SKX_32-NEXT: .LBB43_1: # %cond.store +; SKX_32-NEXT: je .LBB43_2 +; SKX_32-NEXT: # %bb.1: # %cond.store ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; SKX_32-NEXT: movl %ecx, (%eax) +; SKX_32-NEXT: .LBB43_2: # %else ; SKX_32-NEXT: retl call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask) ret void diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll index c111dc30441..f018615c24a 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll @@ -131,12 +131,11 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32> ; WIDEN_AVX2-NEXT: .LBB1_2: # %else ; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax ; WIDEN_AVX2-NEXT: testb $1, %al -; WIDEN_AVX2-NEXT: jne .LBB1_3 -; WIDEN_AVX2-NEXT: # %bb.4: # %else2 -; WIDEN_AVX2-NEXT: retq -; WIDEN_AVX2-NEXT: .LBB1_3: # %cond.store1 +; WIDEN_AVX2-NEXT: je .LBB1_4 +; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1 ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax ; WIDEN_AVX2-NEXT: vmovhpd %xmm0, (%rax) +; WIDEN_AVX2-NEXT: .LBB1_4: # %else2 ; WIDEN_AVX2-NEXT: retq ; ; PROMOTE_AVX2-LABEL: test_scatter_v2i32_index: @@ -157,12 +156,11 @@ define void @test_scatter_v2i32_index(<2 x double> %a1, double* %base, <2 x i32> ; PROMOTE_AVX2-NEXT: .LBB1_2: # %else ; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax ; PROMOTE_AVX2-NEXT: testb $1, %al -; PROMOTE_AVX2-NEXT: jne .LBB1_3 -; PROMOTE_AVX2-NEXT: # %bb.4: # %else2 -; PROMOTE_AVX2-NEXT: retq -; PROMOTE_AVX2-NEXT: .LBB1_3: # %cond.store1 +; PROMOTE_AVX2-NEXT: je .LBB1_4 +; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1 ; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax ; PROMOTE_AVX2-NEXT: vmovhpd %xmm0, (%rax) +; PROMOTE_AVX2-NEXT: .LBB1_4: # %else2 ; PROMOTE_AVX2-NEXT: retq %gep = getelementptr double, double *%base, <2 x i32> %ind call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %gep, i32 4, <2 x i1> %mask) @@ -284,12 +282,11 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas ; WIDEN_AVX2-NEXT: .LBB3_2: # %else ; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax ; WIDEN_AVX2-NEXT: testb $1, %al -; WIDEN_AVX2-NEXT: jne .LBB3_3 -; WIDEN_AVX2-NEXT: # %bb.4: # %else2 -; WIDEN_AVX2-NEXT: retq -; WIDEN_AVX2-NEXT: .LBB3_3: # %cond.store1 +; WIDEN_AVX2-NEXT: je .LBB3_4 +; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1 ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax ; WIDEN_AVX2-NEXT: vextractps $1, %xmm0, (%rax) +; WIDEN_AVX2-NEXT: .LBB3_4: # %else2 ; WIDEN_AVX2-NEXT: retq ; ; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data: @@ -303,12 +300,11 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mas ; PROMOTE_AVX2-NEXT: .LBB3_2: # %else ; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax ; PROMOTE_AVX2-NEXT: testb $1, %al -; PROMOTE_AVX2-NEXT: jne .LBB3_3 -; PROMOTE_AVX2-NEXT: # %bb.4: # %else2 -; PROMOTE_AVX2-NEXT: retq -; PROMOTE_AVX2-NEXT: .LBB3_3: # %cond.store1 +; PROMOTE_AVX2-NEXT: je .LBB3_4 +; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1 ; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax ; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax) +; PROMOTE_AVX2-NEXT: .LBB3_4: # %else2 ; PROMOTE_AVX2-NEXT: retq call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask) ret void @@ -438,12 +434,11 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32> ; WIDEN_AVX2-NEXT: .LBB5_2: # %else ; WIDEN_AVX2-NEXT: vpextrb $8, %xmm2, %eax ; WIDEN_AVX2-NEXT: testb $1, %al -; WIDEN_AVX2-NEXT: jne .LBB5_3 -; WIDEN_AVX2-NEXT: # %bb.4: # %else2 -; WIDEN_AVX2-NEXT: retq -; WIDEN_AVX2-NEXT: .LBB5_3: # %cond.store1 +; WIDEN_AVX2-NEXT: je .LBB5_4 +; WIDEN_AVX2-NEXT: # %bb.3: # %cond.store1 ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax ; WIDEN_AVX2-NEXT: vextractps $1, %xmm0, (%rax) +; WIDEN_AVX2-NEXT: .LBB5_4: # %else2 ; WIDEN_AVX2-NEXT: retq ; ; PROMOTE_AVX2-LABEL: test_scatter_v2i32_data_index: @@ -464,12 +459,11 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1, i32* %base, <2 x i32> ; PROMOTE_AVX2-NEXT: .LBB5_2: # %else ; PROMOTE_AVX2-NEXT: vpextrb $8, %xmm2, %eax ; PROMOTE_AVX2-NEXT: testb $1, %al -; PROMOTE_AVX2-NEXT: jne .LBB5_3 -; PROMOTE_AVX2-NEXT: # %bb.4: # %else2 -; PROMOTE_AVX2-NEXT: retq -; PROMOTE_AVX2-NEXT: .LBB5_3: # %cond.store1 +; PROMOTE_AVX2-NEXT: je .LBB5_4 +; PROMOTE_AVX2-NEXT: # %bb.3: # %cond.store1 ; PROMOTE_AVX2-NEXT: vpextrq $1, %xmm1, %rax ; PROMOTE_AVX2-NEXT: vextractps $2, %xmm0, (%rax) +; PROMOTE_AVX2-NEXT: .LBB5_4: # %else2 ; PROMOTE_AVX2-NEXT: retq %gep = getelementptr i32, i32 *%base, <2 x i32> %ind call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %gep, i32 4, <2 x i1> %mask) diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 36da9386fb0..50da0ed68a5 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -66,21 +66,19 @@ define void @storev1(<1 x i32> %trigger, <1 x i32>* %addr, <1 x i32> %val) { ; AVX-LABEL: storev1: ; AVX: ## %bb.0: ; AVX-NEXT: testl %edi, %edi -; AVX-NEXT: je LBB1_1 -; AVX-NEXT: ## %bb.2: ## %else -; AVX-NEXT: retq -; AVX-NEXT: LBB1_1: ## %cond.store +; AVX-NEXT: jne LBB1_2 +; AVX-NEXT: ## %bb.1: ## %cond.store ; AVX-NEXT: movl %edx, (%rsi) +; AVX-NEXT: LBB1_2: ## %else ; AVX-NEXT: retq ; ; AVX512-LABEL: storev1: ; AVX512: ## %bb.0: ; AVX512-NEXT: testl %edi, %edi -; AVX512-NEXT: je LBB1_1 -; AVX512-NEXT: ## %bb.2: ## %else -; AVX512-NEXT: retq -; AVX512-NEXT: LBB1_1: ## %cond.store +; AVX512-NEXT: jne LBB1_2 +; AVX512-NEXT: ## %bb.1: ## %cond.store ; AVX512-NEXT: movl %edx, (%rsi) +; AVX512-NEXT: LBB1_2: ## %else ; AVX512-NEXT: retq %mask = icmp eq <1 x i32> %trigger, zeroinitializer call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32>%val, <1 x i32>* %addr, i32 4, <1 x i1>%mask) |