diff options
| author | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2018-04-19 12:13:30 +0000 |
|---|---|---|
| committer | Alexander Ivchenko <alexander.ivchenko@intel.com> | 2018-04-19 12:13:30 +0000 |
| commit | e8fed1546e99e87bcad937f67cff2b5defcbe09c (patch) | |
| tree | 74dc198633392aa19d83f43e7de4fa9bdb3d28cb /llvm/lib | |
| parent | 9a175bc1bc171a03854c2ed177fbbea7390745a8 (diff) | |
| download | bcm5719-llvm-e8fed1546e99e87bcad937f67cff2b5defcbe09c.tar.gz bcm5719-llvm-e8fed1546e99e87bcad937f67cff2b5defcbe09c.zip | |
Lowering x86 adds/addus/subs/subus intrinsics (llvm part)
This is the patch that lowers x86 intrinsics to native IR
in order to enable optimizations. The patch also includes folding
of previously missing saturation patterns so that IR emits the same
machine instructions as the intrinsics.
Patch by tkrupa
Differential Revision: https://reviews.llvm.org/D44785
llvm-svn: 330322
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 106 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 89 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 40 |
3 files changed, 193 insertions, 42 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index f3193f5893c..d7d045ffa07 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -84,7 +84,19 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { // like to use this information to remove upgrade code for some older // intrinsics. It is currently undecided how we will determine that future // point. - if (Name=="ssse3.pabs.b.128" || // Added in 6.0 + if (Name.startswith("sse2.padds") || // Added in 7.0 + Name.startswith("sse2.paddus") || // Added in 7.0 + Name.startswith("sse2.psubs") || // Added in 7.0 + Name.startswith("sse2.psubus") || // Added in 7.0 + Name.startswith("avx2.padds") || // Added in 7.0 + Name.startswith("avx2.paddus") || // Added in 7.0 + Name.startswith("avx2.psubs") || // Added in 7.0 + Name.startswith("avx2.psubus") || // Added in 7.0 + Name.startswith("avx512.mask.padds") || // Added in 7.0 + Name.startswith("avx512.mask.paddus") || // Added in 7.0 + Name.startswith("avx512.mask.psubs") || // Added in 7.0 + Name.startswith("avx512.mask.psubus") || // Added in 7.0 + Name=="ssse3.pabs.b.128" || // Added in 6.0 Name=="ssse3.pabs.w.128" || // Added in 6.0 Name=="ssse3.pabs.d.128" || // Added in 6.0 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 @@ -845,6 +857,77 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, return EmitX86Select(Builder, Mask, Align, Passthru); } +static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, + bool IsSigned, bool IsAddition) { + // Get elements. + Value *Op0 = CI.getArgOperand(0); + Value *Op1 = CI.getArgOperand(1); + + // Extend elements. + Type *ResultType = CI.getType(); + unsigned NumElts = ResultType->getVectorNumElements(); + + Value *Res; + if (!IsAddition && !IsSigned) { + Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Op1); + Value *Select = Builder.CreateSelect(ICmp, Op0, Op1); + Res = Builder.CreateSub(Select, Op1); + } else { + Type *EltType = ResultType->getVectorElementType(); + Type *ExtEltType = EltType == Builder.getInt8Ty() ? Builder.getInt16Ty() + : Builder.getInt32Ty(); + Type *ExtVT = VectorType::get(ExtEltType, NumElts); + Op0 = IsSigned ? Builder.CreateSExt(Op0, ExtVT) + : Builder.CreateZExt(Op0, ExtVT); + Op1 = IsSigned ? Builder.CreateSExt(Op1, ExtVT) + : Builder.CreateZExt(Op1, ExtVT); + + // Perform addition/substraction. + Res = IsAddition ? Builder.CreateAdd(Op0, Op1) + : Builder.CreateSub(Op0, Op1); + + // Create a vector of maximum values of not extended type + // (if overflow occurs, it will be saturated to that value). + unsigned EltSizeInBits = EltType->getPrimitiveSizeInBits(); + APInt MaxInt = IsSigned ? APInt::getSignedMaxValue(EltSizeInBits) + : APInt::getMaxValue(EltSizeInBits); + Value *MaxVec = ConstantInt::get(ResultType, MaxInt); + // Extend so that it can be compared to result of add/sub. + MaxVec = IsSigned ? Builder.CreateSExt(MaxVec, ExtVT) + : Builder.CreateZExt(MaxVec, ExtVT); + + // Saturate overflow. + ICmpInst::Predicate Pred = IsSigned ? ICmpInst::ICMP_SLE + : ICmpInst::ICMP_ULE; + Value *Cmp = Builder.CreateICmp(Pred, Res, + MaxVec); // 1 if no overflow. + Res = Builder.CreateSelect(Cmp, Res, + MaxVec); // If overflowed, copy from max vec. + + // Saturate underflow. + if (IsSigned) { + APInt MinInt = APInt::getSignedMinValue(EltSizeInBits); + Value *MinVec = ConstantInt::get(ResultType, MinInt); + // Extend so that it can be compared to result of add/sub. + MinVec = Builder.CreateSExt(MinVec, ExtVT); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Res, + MinVec); // 1 if no underflow. + Res = Builder.CreateSelect(Cmp, Res, + MinVec); // If underflowed, copy from min vec. + } + + // Truncate to original type. + Res = Builder.CreateTrunc(Res, ResultType); + } + + if (CI.getNumArgOperands() == 4) { // For masked intrinsics. + Value *VecSRC = CI.getArgOperand(2); + Value *Mask = CI.getArgOperand(3); + Res = EmitX86Select(Builder, Mask, Res, VecSRC); + } + return Res; +} + static Value *UpgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned) { @@ -1684,6 +1767,26 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { ShuffleMask); Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); + } else if (IsX86 && (Name.startswith("sse2.padds") || + Name.startswith("avx2.padds") || + Name.startswith("avx512.mask.padds"))) { + Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, + true, true); // Signed add. + } else if (IsX86 && (Name.startswith("sse2.paddus") || + Name.startswith("avx2.paddus") || + Name.startswith("avx512.mask.paddus"))) { + Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, + false, true); // Unsigned add. + } else if (IsX86 && (Name.startswith("sse2.psubs") || + Name.startswith("avx2.psubs") || + Name.startswith("avx512.mask.psubs"))) { + Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, + true, false); // Signed sub. + } else if (IsX86 && (Name.startswith("sse2.psubus") || + Name.startswith("avx2.psubus") || + Name.startswith("avx512.mask.psubus"))) { + Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, + false, false); // Unsigned sub. } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || Name.startswith("avx2.vbroadcast") || Name.startswith("avx512.pbroadcast") || @@ -1694,7 +1797,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts); Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()), Constant::getNullValue(MaskTy)); - if (CI->getNumArgOperands() == 3) Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f3f1b86c11..f4bf270d738 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35997,6 +35997,91 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, return SDValue(); } +/// This function detects the addition or substraction with saturation pattern +/// between 2 unsigned i8/i16 vectors and replace this operation with the +/// efficient X86ISD::ADDUS/X86ISD::ADDS/X86ISD::SUBUS/x86ISD::SUBS instruction. +static SDValue detectAddSubSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + const SDLoc &DL) { + if (!VT.isVector() || !VT.isSimple()) + return SDValue(); + EVT InVT = In.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + EVT ScalarVT = VT.getVectorElementType(); + if ((ScalarVT != MVT::i8 && ScalarVT != MVT::i16) || + InVT.getSizeInBits() % 128 != 0 || !isPowerOf2_32(NumElems)) + return SDValue(); + + // InScalarVT is the intermediate type in AddSubSat pattern + // and it should be greater than the original input type (i8/i16). + EVT InScalarVT = InVT.getVectorElementType(); + if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits()) + return SDValue(); + + if (!Subtarget.hasSSE2()) + return SDValue(); + + // Detect the following pattern: + // %2 = zext <16 x i8> %0 to <16 x i16> + // %3 = zext <16 x i8> %1 to <16 x i16> + // %4 = add nuw nsw <16 x i16> %3, %2 + // %5 = icmp ult <16 x i16> %4, <16 x i16> (vector of max InScalarVT values) + // %6 = select <16 x i1> %5, <16 x i16> (vector of max InScalarVT values) + // %7 = trunc <16 x i16> %6 to <16 x i8> + + // Detect a Sat Pattern + bool Signed = true; + SDValue Sat = detectSSatPattern(In, VT, false); + if (!Sat) { + Sat = detectUSatPattern(In, VT); + Signed = false; + } + if (!Sat) + return SDValue(); + if (Sat.getOpcode() != ISD::ADD && Sat.getOpcode() != ISD::SUB) + return SDValue(); + + unsigned Opcode = Sat.getOpcode() == ISD::ADD ? Signed ? X86ISD::ADDS + : X86ISD::ADDUS + : Signed ? X86ISD::SUBS + : X86ISD::SUBUS; + + // Get addition elements. + SDValue LHS = Sat.getOperand(0); + SDValue RHS = Sat.getOperand(1); + + // Check if LHS and RHS are results of type promotion or + // one of them is and the other one is constant. + unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : + ISD::ZERO_EXTEND; + unsigned LHSOpcode = LHS.getOpcode(); + unsigned RHSOpcode = RHS.getOpcode(); + + if (LHSOpcode == ExtendOpcode && RHSOpcode == ExtendOpcode) { + LHS = LHS.getOperand(0); + RHS = RHS.getOperand(0); + } else if (LHSOpcode == ExtendOpcode && + ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) { + LHS = LHS.getOperand(0); + RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS); + } else if (RHSOpcode == ExtendOpcode && + ISD::isBuildVectorOfConstantSDNodes(LHS.getNode())) { + RHS = RHS.getOperand(0); + LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS); + } else + return SDValue(); + + // The pattern is detected, emit ADDS/ADDUS/SUBS/SUBUS instruction. + auto AddSubSatBuilder = [Opcode](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef<SDValue> Ops) { + EVT VT = Ops[0].getValueType(); + return DAG.getNode(Opcode, DL, VT, Ops); + }; + return SplitOpsAndApply(DAG, Subtarget, DL, VT, { LHS, RHS }, + AddSubSatBuilder); +} + static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); @@ -36011,6 +36096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; + // Try to detect addition or substraction with saturation. + if (SDValue AddSubSat = detectAddSubSatPattern(Src, VT, DAG, Subtarget, DL)) + return AddSubSat; + // Try to combine truncation with signed/unsigned saturation. if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) return Val; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index ad95d4e9aff..66f0a675cf1 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -402,10 +402,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(avx2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), @@ -444,10 +440,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), @@ -803,18 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMULS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMULS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_mask_padds_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_mask_padds_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_mask_padds_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_mask_paddus_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(avx512_mask_paddus_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(avx512_mask_paddus_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(avx512_mask_permvar_df_256, VPERM_2OP_MASK, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_mask_permvar_df_512, VPERM_2OP_MASK, @@ -981,18 +961,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0), - X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubus_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubus_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubus_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK, @@ -1602,10 +1570,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(sse2_paddus_b, INTR_TYPE_2OP, X86ISD::ADDUS, 0), - X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), @@ -1627,10 +1591,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(sse2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(sse2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(sse2_sqrt_pd, INTR_TYPE_1OP, ISD::FSQRT, 0), X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE), |

