diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 103 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 64 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr34605.ll | 6 |
3 files changed, 77 insertions, 96 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5bde801bbf7..bcffa72afea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8257,9 +8257,9 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, SelectionDAG & DAG) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); - unsigned NumOfOperands = Op.getNumOperands(); + unsigned NumOperands = Op.getNumOperands(); - assert(isPowerOf2_32(NumOfOperands) && + assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"); // If this node promotes - by concatenating zeroes - the type of the result @@ -8273,73 +8273,58 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, ZeroC); } - SDValue Undef = DAG.getUNDEF(ResVT); - if (NumOfOperands > 2) { - // Specialize the cases when all, or all but one, of the operands are undef. - unsigned NumOfDefinedOps = 0; - unsigned OpIdx = 0; - for (unsigned i = 0; i < NumOfOperands; i++) - if (!Op.getOperand(i).isUndef()) { - NumOfDefinedOps++; - OpIdx = i; - } - if (NumOfDefinedOps == 0) - return Undef; - if (NumOfDefinedOps == 1) { - unsigned SubVecNumElts = - Op.getOperand(OpIdx).getValueType().getVectorNumElements(); - SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, - Op.getOperand(OpIdx), IdxVal); + unsigned NumZero = 0; + unsigned NumNonZero = 0; + uint64_t NonZeros = 0; + for (unsigned i = 0; i != NumOperands; ++i) { + SDValue SubVec = Op.getOperand(i); + if (SubVec.isUndef()) + continue; + if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + ++NumZero; + else { + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + NonZeros |= (uint64_t)1 << i; + ++NumNonZero; } + } + + + // If there are zero or one non-zeros we can handle this very simply. + if (NumNonZero <= 1) { + SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) + : DAG.getUNDEF(ResVT); + if (!NumNonZero) + return Vec; + unsigned Idx = countTrailingZeros(NonZeros); + SDValue SubVec = Op.getOperand(Idx); + unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, + DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); + } + if (NumOperands > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); - SmallVector<SDValue, 2> Ops; - for (unsigned i = 0; i < NumOfOperands/2; i++) - Ops.push_back(Op.getOperand(i)); - SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); - Ops.clear(); - for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++) - Ops.push_back(Op.getOperand(i)); - SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops); + ArrayRef<SDUse> Ops = Op->ops(); + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, + Ops.slice(0, NumOperands/2)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, + Ops.slice(NumOperands/2)); return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } - // 2 operands - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - unsigned NumElems = ResVT.getVectorNumElements(); - assert(V1.getValueType() == V2.getValueType() && - V1.getValueType().getVectorNumElements() == NumElems/2 && - "Unexpected operands in CONCAT_VECTORS"); - - // If this can be done with a subreg insert do that first. - SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - if (V2.isUndef()) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); + assert(NumNonZero == 2 && "Simple cases not handled?"); - if (ResVT.getSizeInBits() >= 16) + if (ResVT.getVectorNumElements() >= 16) return Op; // The operation is legal with KUNPCK - bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode()); - bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode()); - SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl); - if (IsZeroV1 && IsZeroV2) - return ZeroVec; - - if (IsZeroV2) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx); - - SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl); - if (V1.isUndef()) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); - - if (IsZeroV1) - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal); - - V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal); + SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, + DAG.getUNDEF(ResVT), Op.getOperand(0), + DAG.getIntPtrConstant(0, dl)); + unsigned NumElems = ResVT.getVectorNumElements(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), + DAG.getIntPtrConstant(NumElems/2, dl)); } static SDValue LowerCONCAT_VECTORS(SDValue Op, diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index bca1a091d39..00935257baa 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -5715,33 +5715,31 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_q_128: ; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k5 ## encoding: [0xc5,0xf8,0x92,0xef] -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k5} ## encoding: [0x62,0xf2,0xfd,0x0d,0x29,0xc1] -; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k2 {%k5} ## encoding: [0x62,0xf2,0xf5,0x0d,0x37,0xd0] -; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1f,0xc9,0x02] -; CHECK-NEXT: kxorw %k0, %k0, %k3 ## encoding: [0xc5,0xfc,0x47,0xd8] -; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1f,0xe1,0x04] -; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k5} ## encoding: [0x62,0xf3,0xf5,0x0d,0x1f,0xf0,0x02] -; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k5} ## encoding: [0x62,0xf2,0xfd,0x0d,0x37,0xf9] -; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e] -; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e] -; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c] -; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] +; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd8] +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x02] +; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04] +; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x1f,0xf0,0x02] +; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf9] +; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e] +; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) @@ -5812,33 +5810,31 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_q_128: ; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k5 ## encoding: [0xc5,0xf8,0x92,0xef] -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k5} ## encoding: [0x62,0xf2,0xfd,0x0d,0x29,0xc1] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k2 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xd1,0x01] -; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k1 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xc9,0x02] -; CHECK-NEXT: kxorw %k0, %k0, %k3 ## encoding: [0xc5,0xfc,0x47,0xd8] -; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1f,0xe1,0x04] -; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xf1,0x05] -; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k5} ## encoding: [0x62,0xf3,0xfd,0x0d,0x1e,0xf9,0x06] -; CHECK-NEXT: kshiftlw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0e] -; CHECK-NEXT: kshiftrw $14, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0e] -; CHECK-NEXT: kshiftlw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x32,0xed,0x0c] -; CHECK-NEXT: kshiftrw $12, %k5, %k5 ## encoding: [0xc4,0xe3,0xf9,0x30,0xed,0x0c] -; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x01] +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x02] +; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0] +; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04] +; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05] +; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06] +; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e] +; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e] +; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] ; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] +; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] ; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1] ; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask) diff --git a/llvm/test/CodeGen/X86/pr34605.ll b/llvm/test/CodeGen/X86/pr34605.ll index 19fed5db5bc..2209db4e544 100644 --- a/llvm/test/CodeGen/X86/pr34605.ll +++ b/llvm/test/CodeGen/X86/pr34605.ll @@ -13,10 +13,10 @@ define void @pr34605(i8* nocapture %s, i32 %p) { ; CHECK-NEXT: vpcmpeqd {{\.LCPI.*}}, %zmm0, %k2 ; CHECK-NEXT: kunpckwd %k1, %k2, %k1 ; CHECK-NEXT: kunpckdq %k0, %k1, %k0 -; CHECK-NEXT: kxord %k0, %k0, %k1 ; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: kmovd %ecx, %k2 -; CHECK-NEXT: kunpckdq %k2, %k1, %k1 +; CHECK-NEXT: kmovd %ecx, %k1 +; CHECK-NEXT: kshiftlq $32, %k1, %k1 +; CHECK-NEXT: kshiftrq $32, %k1, %k1 ; CHECK-NEXT: kandq %k1, %k0, %k1 ; CHECK-NEXT: vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z} ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 |