summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp31
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll16
2 files changed, 30 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c0ad7e125f..64577422d15 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37660,9 +37660,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
MVT OpVT = N->getSimpleValueType(0);
- // Early out for mask vectors.
- if (OpVT.getVectorElementType() == MVT::i1)
- return SDValue();
+ bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1;
SDLoc dl(N);
SDValue Vec = N->getOperand(0);
@@ -37674,23 +37672,40 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
// Inserting zeros into zeros is a nop.
if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
- return Vec;
+ return getZeroVector(OpVT, Subtarget, DAG, dl);
// If we're inserting into a zero vector and then into a larger zero vector,
// just insert into the larger zero vector directly.
if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) {
unsigned Idx2Val = SubVec.getConstantOperandVal(2);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec,
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
SubVec.getOperand(1),
DAG.getIntPtrConstant(IdxVal + Idx2Val, dl));
}
+ // If we're inserting into a zero vector and our input was extracted from an
+ // insert into a zero vector of the same type and the extraction was at
+ // least as large as the original insertion. Just insert the original
+ // subvector into a zero vector.
+ if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 &&
+ SubVec.getConstantOperandVal(1) == 0 &&
+ SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) {
+ SDValue Ins = SubVec.getOperand(0);
+ if (Ins.getConstantOperandVal(2) == 0 &&
+ ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) &&
+ Ins.getOperand(1).getValueSizeInBits() <= SubVecVT.getSizeInBits())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT,
+ getZeroVector(OpVT, Subtarget, DAG, dl),
+ Ins.getOperand(1), N->getOperand(2));
+ }
+
// If we're inserting a bitcast into zeros, rewrite the insert and move the
// bitcast to the other side. This helps with detecting zero extending
// during isel.
// TODO: Is this useful for other indices than 0?
- if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
+ if (!IsI1Vector && SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) {
MVT CastVT = SubVec.getOperand(0).getSimpleValueType();
unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits();
MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems);
@@ -37701,6 +37716,10 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
}
}
+ // Stop here if this is an i1 vector.
+ if (IsI1Vector)
+ return SDValue();
+
// If this is an insert of an extract, combine to a shuffle. Don't do this
// if the insert or extract can be represented with a subregister operation.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 44e0a74db2e..88e0b8cdd02 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -2757,16 +2757,13 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftlw $12, %k0, %k0
-; KNL-NEXT: kshiftrw $12, %k0, %k0
-; KNL-NEXT: kshiftlw $8, %k0, %k0
-; KNL-NEXT: kshiftrw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $12, %k0, %k1
; KNL-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; KNL-NEXT: retq
;
; SKX-LABEL: mask_widening:
; SKX: ## %bb.0: ## %entry
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
-; SKX-NEXT: kmovb %k0, %k1
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
; SKX-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; SKX-NEXT: retq
;
@@ -2776,9 +2773,7 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i
; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $12, %k0, %k0
-; AVX512BW-NEXT: kshiftlw $8, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $8, %k0, %k1
+; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
; AVX512BW-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512BW-NEXT: retq
;
@@ -2787,9 +2782,8 @@ define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i
; AVX512DQ-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k0
-; AVX512DQ-NEXT: kshiftrb $4, %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, %k1
+; AVX512DQ-NEXT: kshiftlw $12, %k0, %k0
+; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1
; AVX512DQ-NEXT: vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
entry:
OpenPOWER on IntegriCloud