diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-12 00:29:56 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-12 00:29:56 +0000 |
| commit | 034adf26830fb9723138b276075068125ed208a7 (patch) | |
| tree | a8d47f9d5a1e41458061f39944f616dd71f1769f /llvm/lib | |
| parent | ba4a090a24b89bc7f77423d724c410be7e52c601 (diff) | |
| download | bcm5719-llvm-034adf26830fb9723138b276075068125ed208a7.tar.gz bcm5719-llvm-034adf26830fb9723138b276075068125ed208a7.zip | |
[X86] Remove and autoupgrade the scalar fma intrinsics with masking.
This converts them to what clang is now using for codegen. Unfortunately, there seem to be a few kinks to work out still. I'll try to address with follow up patches.
llvm-svn: 336871
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 139 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 17 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 37 |
7 files changed, 129 insertions, 136 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 9c9e5570184..ef62a23b535 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -81,17 +81,17 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("fma.vfmsubadd.") || // Added in 7.0 Name.startswith("fma.vfnmadd.") || // Added in 7.0 Name.startswith("fma.vfnmsub.") || // Added in 7.0 - Name.startswith("avx512.mask.vfmadd.p") || // Added in 7.0 - Name.startswith("avx512.mask.vfnmadd.p") || // Added in 7.0 - Name.startswith("avx512.mask.vfnmsub.p") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmadd.p") || // Added in 7.0 - Name.startswith("avx512.maskz.vfmadd.p") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmsub.p") || // Added in 7.0 - Name.startswith("avx512.mask3.vfnmsub.p") || // Added in 7.0 - Name.startswith("avx512.mask.vfmaddsub.p") || // Added in 7.0 - Name.startswith("avx512.maskz.vfmaddsub.p") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmaddsub.p") || // Added in 7.0 - Name.startswith("avx512.mask3.vfmsubadd.p") || // Added in 7.0 + Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0 + Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0 + Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0 + Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0 + Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0 + Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0 + Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0 + Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0 + Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0 + Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0 + Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 Name.startswith("avx512.kunpck") || //added in 6.0 @@ -826,7 +826,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1) { - // If the mask is all ones just emit the align operation. + // If the mask is all ones just emit the first operation. if (const auto *C = dyn_cast<Constant>(Mask)) if (C->isAllOnesValue()) return Op0; @@ -835,6 +835,21 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, return Builder.CreateSelect(Mask, Op0, Op1); } +static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, + Value *Op0, Value *Op1) { + // If the mask is all ones just emit the first operation. + if (const auto *C = dyn_cast<Constant>(Mask)) + if (C->isAllOnesValue()) + return Op0; + + llvm::VectorType *MaskTy = + llvm::VectorType::get(Builder.getInt1Ty(), + Mask->getType()->getIntegerBitWidth()); + Mask = Builder.CreateBitCast(Mask, MaskTy); + Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); + return Builder.CreateSelect(Mask, Op0, Op1); +} + // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. // PALIGNR handles large immediates by shifting while VALIGN masks the immediate // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. @@ -2806,6 +2821,64 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), Rep, (uint64_t)0); + } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") || + Name.startswith("avx512.maskz.vfmadd.s") || + Name.startswith("avx512.mask3.vfmadd.s") || + Name.startswith("avx512.mask3.vfmsub.s") || + Name.startswith("avx512.mask3.vfnmsub.s"))) { + bool IsMask3 = Name[11] == '3'; + bool IsMaskZ = Name[11] == 'z'; + // Drop the "avx512.mask." to make it easier. + Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); + bool NegMul = Name[2] == 'n'; + bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; + + Value *A = CI->getArgOperand(0); + Value *B = CI->getArgOperand(1); + Value *C = CI->getArgOperand(2); + + if (NegMul && (IsMask3 || IsMaskZ)) + A = Builder.CreateFNeg(A); + if (NegMul && !(IsMask3 || IsMaskZ)) + B = Builder.CreateFNeg(B); + if (NegAcc) + C = Builder.CreateFNeg(C); + + A = Builder.CreateExtractElement(A, (uint64_t)0); + B = Builder.CreateExtractElement(B, (uint64_t)0); + C = Builder.CreateExtractElement(C, (uint64_t)0); + + if (!isa<ConstantInt>(CI->getArgOperand(4)) || + cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) { + Value *Ops[] = { A, B, C, CI->getArgOperand(4) }; + + Intrinsic::ID IID; + if (Name.back() == 'd') + IID = Intrinsic::x86_avx512_vfmadd_f64; + else + IID = Intrinsic::x86_avx512_vfmadd_f32; + Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID); + Rep = Builder.CreateCall(FMA, Ops); + } else { + Function *FMA = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::fma, + A->getType()); + Rep = Builder.CreateCall(FMA, { A, B, C }); + } + + Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) : + IsMask3 ? C : A; + + // For Mask3 with NegAcc, we need to create a new extractelement that + // avoids the negation above. + if (NegAcc && IsMask3) + PassThru = Builder.CreateExtractElement(CI->getArgOperand(2), + (uint64_t)0); + + Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3), + Rep, PassThru); + Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), + Rep, (uint64_t)0); } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") || Name.startswith("avx512.mask.vfnmadd.p") || Name.startswith("avx512.mask.vfnmsub.p") || @@ -2820,6 +2893,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { bool NegMul = Name[2] == 'n'; bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; + Value *A = CI->getArgOperand(0); + Value *B = CI->getArgOperand(1); + Value *C = CI->getArgOperand(2); + + if (NegMul && (IsMask3 || IsMaskZ)) + A = Builder.CreateFNeg(A); + if (NegMul && !(IsMask3 || IsMaskZ)) + B = Builder.CreateFNeg(B); + if (NegAcc) + C = Builder.CreateFNeg(C); + if (CI->getNumArgOperands() == 5 && (!isa<ConstantInt>(CI->getArgOperand(4)) || cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { @@ -2830,38 +2914,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { else IID = Intrinsic::x86_avx512_vfmadd_pd_512; - Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), CI->getArgOperand(4) }; - - if (NegMul) { - if (IsMaskZ || IsMask3) - Ops[0] = Builder.CreateFNeg(Ops[0]); - else - Ops[1] = Builder.CreateFNeg(Ops[1]); - } - if (NegAcc) - Ops[2] = Builder.CreateFNeg(Ops[2]); - Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), - Ops); + { A, B, C, CI->getArgOperand(4) }); } else { - - Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2) }; - - if (NegMul) { - if (IsMaskZ || IsMask3) - Ops[0] = Builder.CreateFNeg(Ops[0]); - else - Ops[1] = Builder.CreateFNeg(Ops[1]); - } - if (NegAcc) - Ops[2] = Builder.CreateFNeg(Ops[2]); - Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, - Ops[0]->getType()); - Rep = Builder.CreateCall(FMA, Ops); + A->getType()); + Rep = Builder.CreateCall(FMA, { A, B, C }); } Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fb923436959..50c616d382e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20710,39 +20710,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } - case FMA_OP_SCALAR_MASK: - case FMA_OP_SCALAR_MASK3: - case FMA_OP_SCALAR_MASKZ: { - SDValue Src1 = Op.getOperand(1); - SDValue Src2 = Op.getOperand(2); - SDValue Src3 = Op.getOperand(3); - SDValue Mask = Op.getOperand(4); - MVT VT = Op.getSimpleValueType(); - SDValue PassThru = SDValue(); - - // set PassThru element - if (IntrData->Type == FMA_OP_SCALAR_MASKZ) - PassThru = getZeroVector(VT, Subtarget, DAG, dl); - else if (IntrData->Type == FMA_OP_SCALAR_MASK3) - PassThru = Src3; - else - PassThru = Src1; - - unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; - if (IntrWithRoundingModeOpcode != 0) { - SDValue Rnd = Op.getOperand(5); - if (!isRoundModeCurDirection(Rnd)) - return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl, - Op.getValueType(), Src1, Src2, - Src3, Rnd), - Mask, PassThru, Subtarget, DAG); - } - - return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, - Op.getValueType(), Src1, Src2, - Src3), - Mask, PassThru, Subtarget, DAG); - } case IFMA_OP: // NOTE: We need to swizzle the operands to pass the multiply operands // first. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 98390cea49d..57899034bd6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6826,6 +6826,13 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector + (Op _.FRC:$src2, _.FRC:$src3, + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), + (!cast<I>(Prefix#"231"#Suffix#"Zr_Int") + VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), + (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>; + + def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (Op _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), (_.ScalarLdFrag addr:$src3)))))), @@ -6841,6 +6848,13 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, addr:$src3)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector + (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3), + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))), + (!cast<I>(Prefix#"231"#Suffix#"Zm_Int") + VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), + addr:$src3)>; + + def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (Op _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), @@ -6948,6 +6962,14 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix, (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>; def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector + (RndOp _.FRC:$src2, _.FRC:$src3, + (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (i32 imm:$rc)))))), + (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int") + VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X), + (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>; + + def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector (X86selects VK1WM:$mask, (RndOp _.FRC:$src2, (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 376f643050f..e6fdac6832b 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -355,6 +355,13 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix, (!cast<Instruction>(Prefix#"132"#Suffix#"m_Int") VR128:$src1, (COPY_TO_REGCLASS RC:$src2, VR128), addr:$src3)>; + + def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector + (Op RC:$src2, (mem_frag addr:$src3), + (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))), + (!cast<Instruction>(Prefix#"231"#Suffix#"m_Int") + VR128:$src1, (COPY_TO_REGCLASS RC:$src2, VR128), + addr:$src3)>; } } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 0413fc9dfba..2dd60a1b8b5 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -28,8 +28,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_3OP_MASK, - FMA_OP_MASK, FMA_OP_MASKZ, - FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, + FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_SCALAR, IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, @@ -879,9 +878,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK, X86ISD::CVTPS2PH, 0), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), - X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), - X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0), @@ -908,14 +904,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK, X86ISD::VPSHUFBITQMB, 0), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND), - - X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND), - - X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND), - X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND), X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ, X86ISD::VFIXUPIMM, 0), X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ, @@ -933,9 +921,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ, X86ISD::VFIXUPIMMS, 0), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), - X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND), - X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 29ae67af7b7..cdf5746bb97 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2535,16 +2535,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_mask_min_ss_round: case Intrinsic::x86_avx512_mask_max_sd_round: case Intrinsic::x86_avx512_mask_min_sd_round: - case Intrinsic::x86_avx512_mask_vfmadd_ss: - case Intrinsic::x86_avx512_mask_vfmadd_sd: - case Intrinsic::x86_avx512_maskz_vfmadd_ss: - case Intrinsic::x86_avx512_maskz_vfmadd_sd: - case Intrinsic::x86_avx512_mask3_vfmadd_ss: - case Intrinsic::x86_avx512_mask3_vfmadd_sd: - case Intrinsic::x86_avx512_mask3_vfmsub_ss: - case Intrinsic::x86_avx512_mask3_vfmsub_sd: - case Intrinsic::x86_avx512_mask3_vfnmsub_ss: - case Intrinsic::x86_avx512_mask3_vfnmsub_sd: case Intrinsic::x86_sse_cmp_ss: case Intrinsic::x86_sse_min_ss: case Intrinsic::x86_sse_max_ss: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 97d24019eb6..425f5ce384b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1497,10 +1497,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_avx512_mask_sub_sd_round: case Intrinsic::x86_avx512_mask_max_sd_round: case Intrinsic::x86_avx512_mask_min_sd_round: - case Intrinsic::x86_avx512_mask_vfmadd_ss: - case Intrinsic::x86_avx512_mask_vfmadd_sd: - case Intrinsic::x86_avx512_maskz_vfmadd_ss: - case Intrinsic::x86_avx512_maskz_vfmadd_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } @@ -1527,39 +1523,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; - case Intrinsic::x86_avx512_mask3_vfmadd_ss: - case Intrinsic::x86_avx512_mask3_vfmadd_sd: - case Intrinsic::x86_avx512_mask3_vfmsub_ss: - case Intrinsic::x86_avx512_mask3_vfmsub_sd: - case Intrinsic::x86_avx512_mask3_vfnmsub_ss: - case Intrinsic::x86_avx512_mask3_vfnmsub_sd: - // These intrinsics get the passthru bits from operand 2. - TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts, - UndefElts, Depth + 1); - if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; } - - // If lowest element of a scalar op isn't used then use Arg2. - if (!DemandedElts[0]) { - Worklist.Add(II); - return II->getArgOperand(2); - } - - // Only lower element is used for operand 0 and 1. - DemandedElts = 1; - TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, - UndefElts2, Depth + 1); - if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, - UndefElts3, Depth + 1); - if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } - - // Lower element is undefined if all three lower elements are undefined. - // Consider things like undef&0. The result is known zero, not undef. - if (!UndefElts2[0] || !UndefElts3[0]) - UndefElts.clearBit(0); - - break; - case Intrinsic::x86_sse2_packssdw_128: case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_sse2_packuswb_128: |

