summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-07-12 00:29:56 +0000
committerCraig Topper <craig.topper@intel.com>2018-07-12 00:29:56 +0000
commit034adf26830fb9723138b276075068125ed208a7 (patch)
treea8d47f9d5a1e41458061f39944f616dd71f1769f /llvm/lib
parentba4a090a24b89bc7f77423d724c410be7e52c601 (diff)
downloadbcm5719-llvm-034adf26830fb9723138b276075068125ed208a7.tar.gz
bcm5719-llvm-034adf26830fb9723138b276075068125ed208a7.zip
[X86] Remove and autoupgrade the scalar fma intrinsics with masking.
This converts them to what clang is now using for codegen. Unfortunately, there seem to be a few kinks to work out still. I'll try to address with follow up patches. llvm-svn: 336871
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp139
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp33
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td22
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA.td7
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h17
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp10
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp37
7 files changed, 129 insertions, 136 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 9c9e5570184..ef62a23b535 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -81,17 +81,17 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
Name.startswith("fma.vfmsubadd.") || // Added in 7.0
Name.startswith("fma.vfnmadd.") || // Added in 7.0
Name.startswith("fma.vfnmsub.") || // Added in 7.0
- Name.startswith("avx512.mask.vfmadd.p") || // Added in 7.0
- Name.startswith("avx512.mask.vfnmadd.p") || // Added in 7.0
- Name.startswith("avx512.mask.vfnmsub.p") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmadd.p") || // Added in 7.0
- Name.startswith("avx512.maskz.vfmadd.p") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmsub.p") || // Added in 7.0
- Name.startswith("avx512.mask3.vfnmsub.p") || // Added in 7.0
- Name.startswith("avx512.mask.vfmaddsub.p") || // Added in 7.0
- Name.startswith("avx512.maskz.vfmaddsub.p") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmaddsub.p") || // Added in 7.0
- Name.startswith("avx512.mask3.vfmsubadd.p") || // Added in 7.0
+ Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
+ Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
+ Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
+ Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
+ Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
+ Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
+ Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
+ Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
+ Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
+ Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
Name.startswith("avx512.kunpck") || //added in 6.0
@@ -826,7 +826,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
Value *Op0, Value *Op1) {
- // If the mask is all ones just emit the align operation.
+ // If the mask is all ones just emit the first operation.
if (const auto *C = dyn_cast<Constant>(Mask))
if (C->isAllOnesValue())
return Op0;
@@ -835,6 +835,21 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
return Builder.CreateSelect(Mask, Op0, Op1);
}
+static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
+ Value *Op0, Value *Op1) {
+ // If the mask is all ones just emit the first operation.
+ if (const auto *C = dyn_cast<Constant>(Mask))
+ if (C->isAllOnesValue())
+ return Op0;
+
+ llvm::VectorType *MaskTy =
+ llvm::VectorType::get(Builder.getInt1Ty(),
+ Mask->getType()->getIntegerBitWidth());
+ Mask = Builder.CreateBitCast(Mask, MaskTy);
+ Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
+ return Builder.CreateSelect(Mask, Op0, Op1);
+}
+
// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
@@ -2806,6 +2821,64 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
Rep, (uint64_t)0);
+ } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
+ Name.startswith("avx512.maskz.vfmadd.s") ||
+ Name.startswith("avx512.mask3.vfmadd.s") ||
+ Name.startswith("avx512.mask3.vfmsub.s") ||
+ Name.startswith("avx512.mask3.vfnmsub.s"))) {
+ bool IsMask3 = Name[11] == '3';
+ bool IsMaskZ = Name[11] == 'z';
+ // Drop the "avx512.mask." to make it easier.
+ Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
+ bool NegMul = Name[2] == 'n';
+ bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
+
+ Value *A = CI->getArgOperand(0);
+ Value *B = CI->getArgOperand(1);
+ Value *C = CI->getArgOperand(2);
+
+ if (NegMul && (IsMask3 || IsMaskZ))
+ A = Builder.CreateFNeg(A);
+ if (NegMul && !(IsMask3 || IsMaskZ))
+ B = Builder.CreateFNeg(B);
+ if (NegAcc)
+ C = Builder.CreateFNeg(C);
+
+ A = Builder.CreateExtractElement(A, (uint64_t)0);
+ B = Builder.CreateExtractElement(B, (uint64_t)0);
+ C = Builder.CreateExtractElement(C, (uint64_t)0);
+
+ if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
+ cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
+ Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
+
+ Intrinsic::ID IID;
+ if (Name.back() == 'd')
+ IID = Intrinsic::x86_avx512_vfmadd_f64;
+ else
+ IID = Intrinsic::x86_avx512_vfmadd_f32;
+ Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
+ Rep = Builder.CreateCall(FMA, Ops);
+ } else {
+ Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
+ Intrinsic::fma,
+ A->getType());
+ Rep = Builder.CreateCall(FMA, { A, B, C });
+ }
+
+ Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
+ IsMask3 ? C : A;
+
+ // For Mask3 with NegAcc, we need to create a new extractelement that
+ // avoids the negation above.
+ if (NegAcc && IsMask3)
+ PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
+ (uint64_t)0);
+
+ Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
+ Rep, PassThru);
+ Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
+ Rep, (uint64_t)0);
} else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
Name.startswith("avx512.mask.vfnmadd.p") ||
Name.startswith("avx512.mask.vfnmsub.p") ||
@@ -2820,6 +2893,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool NegMul = Name[2] == 'n';
bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
+ Value *A = CI->getArgOperand(0);
+ Value *B = CI->getArgOperand(1);
+ Value *C = CI->getArgOperand(2);
+
+ if (NegMul && (IsMask3 || IsMaskZ))
+ A = Builder.CreateFNeg(A);
+ if (NegMul && !(IsMask3 || IsMaskZ))
+ B = Builder.CreateFNeg(B);
+ if (NegAcc)
+ C = Builder.CreateFNeg(C);
+
if (CI->getNumArgOperands() == 5 &&
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
@@ -2830,38 +2914,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
else
IID = Intrinsic::x86_avx512_vfmadd_pd_512;
- Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), CI->getArgOperand(4) };
-
- if (NegMul) {
- if (IsMaskZ || IsMask3)
- Ops[0] = Builder.CreateFNeg(Ops[0]);
- else
- Ops[1] = Builder.CreateFNeg(Ops[1]);
- }
- if (NegAcc)
- Ops[2] = Builder.CreateFNeg(Ops[2]);
-
Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
- Ops);
+ { A, B, C, CI->getArgOperand(4) });
} else {
-
- Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2) };
-
- if (NegMul) {
- if (IsMaskZ || IsMask3)
- Ops[0] = Builder.CreateFNeg(Ops[0]);
- else
- Ops[1] = Builder.CreateFNeg(Ops[1]);
- }
- if (NegAcc)
- Ops[2] = Builder.CreateFNeg(Ops[2]);
-
Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::fma,
- Ops[0]->getType());
- Rep = Builder.CreateCall(FMA, Ops);
+ A->getType());
+ Rep = Builder.CreateCall(FMA, { A, B, C });
}
Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fb923436959..50c616d382e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20710,39 +20710,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case FMA_OP_SCALAR_MASK:
- case FMA_OP_SCALAR_MASK3:
- case FMA_OP_SCALAR_MASKZ: {
- SDValue Src1 = Op.getOperand(1);
- SDValue Src2 = Op.getOperand(2);
- SDValue Src3 = Op.getOperand(3);
- SDValue Mask = Op.getOperand(4);
- MVT VT = Op.getSimpleValueType();
- SDValue PassThru = SDValue();
-
- // set PassThru element
- if (IntrData->Type == FMA_OP_SCALAR_MASKZ)
- PassThru = getZeroVector(VT, Subtarget, DAG, dl);
- else if (IntrData->Type == FMA_OP_SCALAR_MASK3)
- PassThru = Src3;
- else
- PassThru = Src1;
-
- unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- SDValue Rnd = Op.getOperand(5);
- if (!isRoundModeCurDirection(Rnd))
- return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, dl,
- Op.getValueType(), Src1, Src2,
- Src3, Rnd),
- Mask, PassThru, Subtarget, DAG);
- }
-
- return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl,
- Op.getValueType(), Src1, Src2,
- Src3),
- Mask, PassThru, Subtarget, DAG);
- }
case IFMA_OP:
// NOTE: We need to swizzle the operands to pass the multiply operands
// first.
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 98390cea49d..57899034bd6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6826,6 +6826,13 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
+ (Op _.FRC:$src2, _.FRC:$src3,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
+ (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
+ VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+ (COPY_TO_REGCLASS _.FRC:$src3, VR128X))>;
+
+ def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(Op _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
(_.ScalarLdFrag addr:$src3)))))),
@@ -6841,6 +6848,13 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
addr:$src3)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
+ (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
+ (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
+ VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+ addr:$src3)>;
+
+ def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(X86selects VK1WM:$mask,
(Op _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
@@ -6948,6 +6962,14 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
(COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
+ (RndOp _.FRC:$src2, _.FRC:$src3,
+ (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
+ (i32 imm:$rc)))))),
+ (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
+ VR128X:$src1, (COPY_TO_REGCLASS _.FRC:$src2, VR128X),
+ (COPY_TO_REGCLASS _.FRC:$src3, VR128X), imm:$rc)>;
+
+ def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(X86selects VK1WM:$mask,
(RndOp _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 376f643050f..e6fdac6832b 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -355,6 +355,13 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
(!cast<Instruction>(Prefix#"132"#Suffix#"m_Int")
VR128:$src1, (COPY_TO_REGCLASS RC:$src2, VR128),
addr:$src3)>;
+
+ def : Pat<(VT (Move (VT VR128:$src1), (VT (scalar_to_vector
+ (Op RC:$src2, (mem_frag addr:$src3),
+ (EltVT (extractelt (VT VR128:$src1), (iPTR 0)))))))),
+ (!cast<Instruction>(Prefix#"231"#Suffix#"m_Int")
+ VR128:$src1, (COPY_TO_REGCLASS RC:$src2, VR128),
+ addr:$src3)>;
}
}
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0413fc9dfba..2dd60a1b8b5 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -28,8 +28,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK,
- FMA_OP_MASK, FMA_OP_MASKZ,
- FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
+ FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_SCALAR,
IFMA_OP, VPERM_2OP, INTR_TYPE_SCALAR_MASK,
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
COMPRESS_EXPAND_IN_REG,
@@ -879,9 +878,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
X86ISD::CVTPS2PH, 0),
- X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
- X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
-
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
@@ -908,14 +904,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
X86ISD::VPSHUFBITQMB, 0),
- X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3, X86ISD::FMADDS3_RND),
-
- X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3, X86ISD::FMSUBS3_RND),
-
- X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
- X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3, X86ISD::FNMSUBS3_RND),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
@@ -933,9 +921,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_fixupimm_ss, FIXUPIMMS_MASKZ,
X86ISD::VFIXUPIMMS, 0),
- X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
- X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1, X86ISD::FMADDS1_RND),
-
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 29ae67af7b7..cdf5746bb97 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2535,16 +2535,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx512_mask_min_ss_round:
case Intrinsic::x86_avx512_mask_max_sd_round:
case Intrinsic::x86_avx512_mask_min_sd_round:
- case Intrinsic::x86_avx512_mask_vfmadd_ss:
- case Intrinsic::x86_avx512_mask_vfmadd_sd:
- case Intrinsic::x86_avx512_maskz_vfmadd_ss:
- case Intrinsic::x86_avx512_maskz_vfmadd_sd:
- case Intrinsic::x86_avx512_mask3_vfmadd_ss:
- case Intrinsic::x86_avx512_mask3_vfmadd_sd:
- case Intrinsic::x86_avx512_mask3_vfmsub_ss:
- case Intrinsic::x86_avx512_mask3_vfmsub_sd:
- case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
- case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
case Intrinsic::x86_sse_cmp_ss:
case Intrinsic::x86_sse_min_ss:
case Intrinsic::x86_sse_max_ss:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 97d24019eb6..425f5ce384b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1497,10 +1497,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_avx512_mask_sub_sd_round:
case Intrinsic::x86_avx512_mask_max_sd_round:
case Intrinsic::x86_avx512_mask_min_sd_round:
- case Intrinsic::x86_avx512_mask_vfmadd_ss:
- case Intrinsic::x86_avx512_mask_vfmadd_sd:
- case Intrinsic::x86_avx512_maskz_vfmadd_ss:
- case Intrinsic::x86_avx512_maskz_vfmadd_sd:
TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
@@ -1527,39 +1523,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
- case Intrinsic::x86_avx512_mask3_vfmadd_ss:
- case Intrinsic::x86_avx512_mask3_vfmadd_sd:
- case Intrinsic::x86_avx512_mask3_vfmsub_ss:
- case Intrinsic::x86_avx512_mask3_vfmsub_sd:
- case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
- case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
- // These intrinsics get the passthru bits from operand 2.
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
- UndefElts, Depth + 1);
- if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
-
- // If lowest element of a scalar op isn't used then use Arg2.
- if (!DemandedElts[0]) {
- Worklist.Add(II);
- return II->getArgOperand(2);
- }
-
- // Only lower element is used for operand 0 and 1.
- DemandedElts = 1;
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts3, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
-
- // Lower element is undefined if all three lower elements are undefined.
- // Consider things like undef&0. The result is known zero, not undef.
- if (!UndefElts2[0] || !UndefElts3[0])
- UndefElts.clearBit(0);
-
- break;
-
case Intrinsic::x86_sse2_packssdw_128:
case Intrinsic::x86_sse2_packsswb_128:
case Intrinsic::x86_sse2_packuswb_128:
OpenPOWER on IntegriCloud