diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 31 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 16 |
2 files changed, 30 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3c0ad7e125f..64577422d15 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37660,9 +37660,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, MVT OpVT = N->getSimpleValueType(0); - // Early out for mask vectors. - if (OpVT.getVectorElementType() == MVT::i1) - return SDValue(); + bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1; SDLoc dl(N); SDValue Vec = N->getOperand(0); @@ -37674,23 +37672,40 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (ISD::isBuildVectorAllZeros(Vec.getNode())) { // Inserting zeros into zeros is a nop. if (ISD::isBuildVectorAllZeros(SubVec.getNode())) - return Vec; + return getZeroVector(OpVT, Subtarget, DAG, dl); // If we're inserting into a zero vector and then into a larger zero vector, // just insert into the larger zero vector directly. if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { unsigned Idx2Val = SubVec.getConstantOperandVal(2); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, + getZeroVector(OpVT, Subtarget, DAG, dl), SubVec.getOperand(1), DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); } + // If we're inserting into a zero vector and our input was extracted from an + // insert into a zero vector of the same type and the extraction was at + // least as large as the original insertion. Just insert the original + // subvector into a zero vector. + if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && + SubVec.getConstantOperandVal(1) == 0 && + SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { + SDValue Ins = SubVec.getOperand(0); + if (Ins.getConstantOperandVal(2) == 0 && + ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && + Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, + getZeroVector(OpVT, Subtarget, DAG, dl), + Ins.getOperand(1), N->getOperand(2)); + } + // If we're inserting a bitcast into zeros, rewrite the insert and move the // bitcast to the other side. This helps with detecting zero extending // during isel. // TODO: Is this useful for other indices than 0? - if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) { + if (!IsI1Vector && SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) { MVT CastVT = SubVec.getOperand(0).getSimpleValueType(); unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits(); MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems); @@ -37701,6 +37716,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, } } + // Stop here if this is an i1 vector. + if (IsI1Vector) + return SDValue(); + // If this is an insert of an extract, combine to a shuffle. Don't do this // if the insert or extract can be represented with a subregister operation. if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 44e0a74db2e..88e0b8cdd02 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2757,16 +2757,13 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftlw $12, %k0, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k1 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; KNL-NEXT: retq ; ; SKX-LABEL: mask_widening: ; SKX: ## %bb.0: ## %entry -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; SKX-NEXT: kmovb %k0, %k1 +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 ; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; SKX-NEXT: retq ; @@ -2776,9 +2773,7 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $12, %k0, %k0 -; AVX512BW-NEXT: kshiftlw $8, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $8, %k0, %k1 +; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 ; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; AVX512BW-NEXT: retq ; @@ -2787,9 +2782,8 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i ; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0 -; AVX512DQ-NEXT: kmovb %k0, %k1 +; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0 +; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1 ; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1} ; AVX512DQ-NEXT: retq entry: |