diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 113 |
3 files changed, 65 insertions, 150 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index bd4638f147a..2455d3bb3d9 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -267,6 +267,9 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0 + Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0 + Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0 + Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0 Name == "sse.cvtsi2ss" || // Added in 7.0 Name == "sse.cvtsi642ss" || // Added in 7.0 Name == "sse2.cvtsi2sd" || // Added in 7.0 @@ -2599,6 +2602,67 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) : CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); + } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") || + Name.startswith("avx512.mask.vpermt2var.") || + Name.startswith("avx512.maskz.vpermt2var."))) { + bool ZeroMask = Name[11] == 'z'; + bool IndexForm = Name[17] == 'i'; + unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); + unsigned EltWidth = CI->getType()->getScalarSizeInBits(); + bool IsFloat = CI->getType()->isFPOrFPVectorTy(); + Intrinsic::ID IID; + if (VecWidth == 128 && EltWidth == 32 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_ps_128; + else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_d_128; + else if (VecWidth == 128 && EltWidth == 64 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_pd_128; + else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_q_128; + else if (VecWidth == 256 && EltWidth == 32 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_ps_256; + else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_d_256; + else if (VecWidth == 256 && EltWidth == 64 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_pd_256; + else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_q_256; + else if (VecWidth == 512 && EltWidth == 32 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_ps_512; + else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_d_512; + else if (VecWidth == 512 && EltWidth == 64 && IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_pd_512; + else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) + IID = Intrinsic::x86_avx512_vpermi2var_q_512; + else if (VecWidth == 128 && EltWidth == 16) + IID = Intrinsic::x86_avx512_vpermi2var_hi_128; + else if (VecWidth == 256 && EltWidth == 16) + IID = Intrinsic::x86_avx512_vpermi2var_hi_256; + else if (VecWidth == 512 && EltWidth == 16) + IID = Intrinsic::x86_avx512_vpermi2var_hi_512; + else if (VecWidth == 128 && EltWidth == 8) + IID = Intrinsic::x86_avx512_vpermi2var_qi_128; + else if (VecWidth == 256 && EltWidth == 8) + IID = Intrinsic::x86_avx512_vpermi2var_qi_256; + else if (VecWidth == 512 && EltWidth == 8) + IID = Intrinsic::x86_avx512_vpermi2var_qi_512; + else + llvm_unreachable("Unexpected intrinsic"); + + Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), + CI->getArgOperand(2) }; + + // If this isn't index form we need to swap operand 0 and 1. + if (!IndexForm) + std::swap(Args[0], Args[1]); + + Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), + Args); + Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) + : Builder.CreateBitCast(CI->getArgOperand(1), + CI->getType()); + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); } else if (IsX86 && Name.startswith("avx512.mask.") && upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { // Rep will be updated by the call in the condition. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 09bd7bf274a..15eeddf0faa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20515,44 +20515,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // Swap Src1 and Src2 in the node creation return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); } - case VPERM_3OP_MASKZ: - case VPERM_3OP_MASK:{ - // Src2 is the PassThru - SDValue Src1 = Op.getOperand(1); - SDValue Src2 = Op.getOperand(2); - SDValue Src3 = Op.getOperand(3); - SDValue Mask = Op.getOperand(4); - MVT VT = Op.getSimpleValueType(); - - // set PassThru element - SDValue PassThru; - if (IntrData->Type == VPERM_3OP_MASKZ) - PassThru = getZeroVector(VT, Subtarget, DAG, dl); - else - PassThru = DAG.getBitcast(VT, Src2); - - // Swap Src1 and Src2 in the node creation - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, - dl, Op.getValueType(), - Src2, Src1, Src3), - Mask, PassThru, Subtarget, DAG); - } - case VPERMI_3OP_MASK:{ - // Src2 is the PassThru - SDValue Src1 = Op.getOperand(1); - SDValue Src2 = Op.getOperand(2); - SDValue Src3 = Op.getOperand(3); - SDValue Mask = Op.getOperand(4); - MVT VT = Op.getSimpleValueType(); - - // set PassThru element - SDValue PassThru = DAG.getBitcast(VT, Src2); - - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, - dl, Op.getValueType(), - Src1, Src2, Src3), - Mask, PassThru, Subtarget, DAG); - } case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 3a79af9a1f5..7b5df0fd325 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,8 +30,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_IMM8_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, - IFMA_OP, - VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, + IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, @@ -1061,79 +1060,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_128, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_256, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_512, VPERM_3OP_MASK, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0), X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0), X86_INTRINSIC_DATA(avx512_mask_vpshld_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0), @@ -1277,43 +1203,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_512, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_128, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_256, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_hi_512, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_128, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_256, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_pd_512, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_128, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_256, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_ps_512, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_128, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_256, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_128, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_256, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_512, VPERM_3OP_MASKZ, - X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), |