diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 30 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 32 |
3 files changed, 41 insertions, 44 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index ba2f3fa9248..bd4638f147a 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -265,6 +265,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0 + Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0 + Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0 Name == "sse.cvtsi2ss" || // Added in 7.0 Name == "sse.cvtsi642ss" || // Added in 7.0 Name == "sse2.cvtsi2sd" || // Added in 7.0 @@ -2569,6 +2571,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) : CI->getArgOperand(0); Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); + } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") || + Name.startswith("avx512.maskz.vpmadd52"))) { + bool ZeroMask = Name[11] == 'z'; + bool High = Name[20] == 'h' || Name[21] == 'h'; + unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); + Intrinsic::ID IID; + if (VecWidth == 128 && !High) + IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; + else if (VecWidth == 256 && !High) + IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; + else if (VecWidth == 512 && !High) + IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; + else if (VecWidth == 128 && High) + IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; + else if (VecWidth == 256 && High) + IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; + else if (VecWidth == 512 && High) + IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; + else + llvm_unreachable("Unexpected intrinsic"); + + Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), + CI->getArgOperand(2) }; + Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), + Args); + Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) + : CI->getArgOperand(0); + Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); } else if (IsX86 && Name.startswith("avx512.mask.") && upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { // Rep will be updated by the call in the condition. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 218ba047a2e..5b699b18dd9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20624,26 +20624,11 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src3), Mask, PassThru, Subtarget, DAG); } - case IFMA_OP_MASKZ: - case IFMA_OP_MASK: { - SDValue Src1 = Op.getOperand(1); - SDValue Src2 = Op.getOperand(2); - SDValue Src3 = Op.getOperand(3); - SDValue Mask = Op.getOperand(4); - MVT VT = Op.getSimpleValueType(); - SDValue PassThru = Src1; - - // set PassThru element - if (IntrData->Type == IFMA_OP_MASKZ) - PassThru = getZeroVector(VT, Subtarget, DAG, dl); - - // Node we need to swizzle the operands to pass the multiply operands + case IFMA_OP: + // NOTE: We need to swizzle the operands to pass the multiply operands // first. - return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, - dl, Op.getValueType(), - Src2, Src3, Src1), - Mask, PassThru, Subtarget, DAG); - } + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case CVTPD2PS: // ISD::FP_ROUND has a second argument that indicates if the truncation // does not change the value. Set it to 0 since it can change. diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index dca513bf7f8..d5263767db1 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,7 +30,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_IMM8_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, - IFMA_OP_MASK, IFMA_OP_MASKZ, + IFMA_OP, VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, @@ -1133,18 +1133,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_512, VPERM_3OP_MASK, X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpmadd52h_uq_128 , IFMA_OP_MASK, - X86ISD::VPMADD52H, 0), - X86_INTRINSIC_DATA(avx512_mask_vpmadd52h_uq_256 , IFMA_OP_MASK, - X86ISD::VPMADD52H, 0), - X86_INTRINSIC_DATA(avx512_mask_vpmadd52h_uq_512 , IFMA_OP_MASK, - X86ISD::VPMADD52H, 0), - X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_128 , IFMA_OP_MASK, - X86ISD::VPMADD52L, 0), - X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_256 , IFMA_OP_MASK, - X86ISD::VPMADD52L, 0), - X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , IFMA_OP_MASK, - X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0), X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0), @@ -1325,18 +1313,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_512, VPERM_3OP_MASKZ, X86ISD::VPERMV3, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpmadd52h_uq_128, IFMA_OP_MASKZ, - X86ISD::VPMADD52H, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpmadd52h_uq_256, IFMA_OP_MASKZ, - X86ISD::VPMADD52H, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpmadd52h_uq_512, IFMA_OP_MASKZ, - X86ISD::VPMADD52H, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_128, IFMA_OP_MASKZ, - X86ISD::VPMADD52L, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_256, IFMA_OP_MASKZ, - X86ISD::VPMADD52L, 0), - X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, IFMA_OP_MASKZ, - X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), @@ -1461,6 +1437,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0), X86_INTRINSIC_DATA(avx512_vpermilvar_pd_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_vpermilvar_ps_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_vpmadd52h_uq_128 , IFMA_OP, X86ISD::VPMADD52H, 0), + X86_INTRINSIC_DATA(avx512_vpmadd52h_uq_256 , IFMA_OP, X86ISD::VPMADD52H, 0), + X86_INTRINSIC_DATA(avx512_vpmadd52h_uq_512 , IFMA_OP, X86ISD::VPMADD52H, 0), + X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0), + X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0), + X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0), |