1 files changed, 179 insertions, 53 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 703a3af1918..f434b7de51b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -410,6 +410,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
     setOperationAction(ISD::SELECT, VT, Custom);
     setOperationAction(ISD::SETCC, VT, Custom);
+    setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+    setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
   }
   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
     if (VT == MVT::i64 && !Subtarget.is64Bit())
@@ -929,6 +931,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::SETCC,              VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCC,      VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCCS,     VT, Custom);
       setOperationAction(ISD::CTPOP,              VT, Custom);
       setOperationAction(ISD::ABS,                VT, Custom);
 
@@ -1212,6 +1216,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::SETCC,           VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCC,   VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCCS,  VT, Custom);
       setOperationAction(ISD::CTPOP,           VT, Custom);
       setOperationAction(ISD::CTLZ,            VT, Custom);
 
@@ -1374,6 +1380,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SUB,              VT, Custom);
       setOperationAction(ISD::MUL,              VT, Custom);
       setOperationAction(ISD::SETCC,            VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCC,    VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCCS,   VT, Custom);
       setOperationAction(ISD::SELECT,           VT, Custom);
       setOperationAction(ISD::TRUNCATE,         VT, Custom);
       setOperationAction(ISD::UADDSAT,          VT, Custom);
@@ -1513,6 +1521,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::ROTL,             VT, Custom);
       setOperationAction(ISD::ROTR,             VT, Custom);
       setOperationAction(ISD::SETCC,            VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCC,    VT, Custom);
+      setOperationAction(ISD::STRICT_FSETCCS,   VT, Custom);
       setOperationAction(ISD::SELECT,           VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
@@ -20264,14 +20274,20 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
 /// equivalent.
 SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
-                                   const SDLoc &dl, SelectionDAG &DAG) const {
+                                   const SDLoc &dl, SelectionDAG &DAG,
+                                   SDValue Chain, bool IsSignaling) const {
   if (isNullConstant(Op1))
     return EmitTest(Op0, X86CC, dl, DAG, Subtarget);
 
   EVT CmpVT = Op0.getValueType();
 
-  if (CmpVT.isFloatingPoint())
+  if (CmpVT.isFloatingPoint()) {
+    if (Chain)
+      return DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS
+                                     : X86ISD::STRICT_FCMP,
+                         dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1});
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+  }
 
   assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 ||
           CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
@@ -20333,16 +20349,19 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
                                                  SelectionDAG &DAG) const {
   // If the subtarget does not support the FUCOMI instruction, floating-point
   // comparisons have to be converted.
-  if (Subtarget.hasCMov() ||
-      Cmp.getOpcode() != X86ISD::CMP ||
-      !Cmp.getOperand(0).getValueType().isFloatingPoint() ||
-      !Cmp.getOperand(1).getValueType().isFloatingPoint())
+  bool IsCmp = Cmp.getOpcode() == X86ISD::CMP;
+  bool IsStrictCmp = Cmp.getOpcode() == X86ISD::STRICT_FCMP ||
+                     Cmp.getOpcode() == X86ISD::STRICT_FCMPS;
+
+  if (Subtarget.hasCMov() || (!IsCmp && !IsStrictCmp) ||
+      !Cmp.getOperand(IsStrictCmp ? 1 : 0).getValueType().isFloatingPoint() ||
+      !Cmp.getOperand(IsStrictCmp ? 2 : 1).getValueType().isFloatingPoint())
     return Cmp;
 
   // The instruction selector will select an FUCOM instruction instead of
   // FUCOMI, which writes the comparison result to FPSW instead of EFLAGS. Hence
   // build an SDNode sequence that transfers the result from FPSW into EFLAGS:
-  // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86cmp ...)), 8))))
+  // (X86sahf (trunc (srl (X86fp_stsw (trunc (X86any_fcmp ...)), 8))))
   SDLoc dl(Cmp);
   SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp);
   SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW);
@@ -20586,7 +20605,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
 /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask
 /// CMPs.
 static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
-                                   SDValue &Op1) {
+                                   SDValue &Op1, bool &IsAlwaysSignaling) {
   unsigned SSECC;
   bool Swap = false;
 
@@ -20625,6 +20644,22 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
   if (Swap)
     std::swap(Op0, Op1);
 
+  switch (SetCCOpcode) {
+  default:
+    IsAlwaysSignaling = true;
+    break;
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+  case ISD::SETUEQ:
+  case ISD::SETNE:
+  case ISD::SETONE:
+  case ISD::SETUNE:
+  case ISD::SETO:
+  case ISD::SETUO:
+    IsAlwaysSignaling = false;
+    break;
+  }
+
   return SSECC;
 }
 
@@ -20769,12 +20804,16 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
 
 static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
                            SelectionDAG &DAG) {
-  SDValue Op0 = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
-  SDValue CC = Op.getOperand(2);
-  MVT VT = Op.getSimpleValueType();
+  bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC ||
+                  Op.getOpcode() == ISD::STRICT_FSETCCS;
+  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
+  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+  SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
+  SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
+  SDValue CC = Op.getOperand(IsStrict ? 3 : 2);
+  MVT VT = Op->getSimpleValueType(0);
   ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
-  bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
+  bool isFP = Op1.getSimpleValueType().isFloatingPoint();
   SDLoc dl(Op);
 
   if (isFP) {
@@ -20795,34 +20834,74 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
       VT = Op0.getSimpleValueType();
     }
 
-    // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
-    // emit two comparisons and a logic op to tie them together.
     SDValue Cmp;
-    unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
-    if (SSECC >= 8 && !Subtarget.hasAVX()) {
-      // LLVM predicate is SETUEQ or SETONE.
-      unsigned CC0, CC1;
-      unsigned CombineOpc;
-      if (Cond == ISD::SETUEQ) {
-        CC0 = 3; // UNORD
-        CC1 = 0; // EQ
-        CombineOpc = X86ISD::FOR;
+    bool IsAlwaysSignaling;
+    unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1, IsAlwaysSignaling);
+    if (!Subtarget.hasAVX()) {
+      // TODO: We could use following steps to handle a quiet compare with
+      // signaling encodings.
+      // 1. Get ordered masks from a quiet ISD::SETO
+      // 2. Use the masks to mask potential unordered elements in operand A, B
+      // 3. Get the compare results of masked A, B
+      // 4. Calculating final result using the mask and result from 3
+      // But currently, we just fall back to scalar operations.
+      if (IsStrict && IsAlwaysSignaling && !IsSignaling)
+        return SDValue();
+
+      // Insert an extra signaling instruction to raise exception.
+      if (IsStrict && !IsAlwaysSignaling && IsSignaling) {
+        SDValue SignalCmp = DAG.getNode(
+            Opc, dl, {VT, MVT::Other},
+            {Chain, Op0, Op1, DAG.getTargetConstant(1, dl, MVT::i8)}); // LT_OS
+        // FIXME: It seems we need to update the flags of all new strict nodes.
+        // Otherwise, mayRaiseFPException in MI will return false due to
+        // NoFPExcept = false by default. However, I didn't find it in other
+        // patches.
+        SignalCmp->setFlags(Op->getFlags());
+        Chain = SignalCmp.getValue(1);
+      }
+
+      // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE),
+      // emit two comparisons and a logic op to tie them together.
+      if (SSECC >= 8) {
+        // LLVM predicate is SETUEQ or SETONE.
+        unsigned CC0, CC1;
+        unsigned CombineOpc;
+        if (Cond == ISD::SETUEQ) {
+          CC0 = 3; // UNORD
+          CC1 = 0; // EQ
+          CombineOpc = X86ISD::FOR;
+        } else {
+          assert(Cond == ISD::SETONE);
+          CC0 = 7; // ORD
+          CC1 = 4; // NEQ
+          CombineOpc = X86ISD::FAND;
+        }
+
+        SDValue Cmp0 = DAG.getNode(
+            Opc, dl, {VT, MVT::Other},
+            {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)});
+        SDValue Cmp1 = DAG.getNode(
+            Opc, dl, {VT, MVT::Other},
+            {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)});
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1),
+                            Cmp1.getValue(1));
+        Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
       } else {
-        assert(Cond == ISD::SETONE);
-        CC0 = 7; // ORD
-        CC1 = 4; // NEQ
-        CombineOpc = X86ISD::FAND;
+        Cmp = DAG.getNode(
+            Opc, dl, {VT, MVT::Other},
+            {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
+        Chain = Cmp.getValue(1);
       }
-
-      SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                                 DAG.getTargetConstant(CC0, dl, MVT::i8));
-      SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                                 DAG.getTargetConstant(CC1, dl, MVT::i8));
-      Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
     } else {
       // Handle all other FP comparisons here.
-      Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
-                        DAG.getTargetConstant(SSECC, dl, MVT::i8));
+      if (IsStrict)
+        // Make a flip on already signaling CCs before setting bit 4 of AVX CC.
+        SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4;
+      Cmp = DAG.getNode(
+          Opc, dl, {VT, MVT::Other},
+          {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)});
+      Chain = Cmp.getValue(1);
     }
 
     // If this is SSE/AVX CMPP, bitcast the result back to integer to match the
@@ -20831,9 +20910,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     if (Opc == X86ISD::CMPP)
       Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp);
 
+    if (IsStrict)
+      return DAG.getMergeValues({Cmp, Chain}, dl);
+
     return Cmp;
   }
 
+  assert(!IsStrict && "Strict SETCC only handles FP operands.");
+
   MVT VTOp0 = Op0.getSimpleValueType();
   (void)VTOp0;
   assert(VTOp0 == Op1.getSimpleValueType() &&
@@ -21143,8 +21227,9 @@ static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
 /// corresponding X86 condition code constant in X86CC.
 SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
                                              ISD::CondCode CC, const SDLoc &dl,
-                                             SelectionDAG &DAG,
-                                             SDValue &X86CC) const {
+                                             SelectionDAG &DAG, SDValue &X86CC,
+                                             SDValue &Chain,
+                                             bool IsSignaling) const {
   // Optimize to BT if possible.
   // Lower (X & (1 << N)) == 0 to BT(X, N).
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
@@ -21192,7 +21277,9 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
   if (CondCode == X86::COND_INVALID)
     return SDValue();
 
-  SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG);
+  SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Chain, IsSignaling);
+  if (Chain)
+    Chain = EFLAGS.getValue(1);
   EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
   X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
   return EFLAGS;
@@ -21200,19 +21287,26 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
 
 SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
-  MVT VT = Op.getSimpleValueType();
+  bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC ||
+                  Op.getOpcode() == ISD::STRICT_FSETCCS;
+  MVT VT = Op->getSimpleValueType(0);
 
   if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
 
   assert(VT == MVT::i8 && "SetCC type must be 8-bit integer");
-  SDValue Op0 = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
+  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
+  SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0);
+  SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1);
   SDLoc dl(Op);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  ISD::CondCode CC =
+      cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
 
   // Handle f128 first, since one possible outcome is a normal integer
   // comparison which gets handled by emitFlagsForSetcc.
   if (Op0.getValueType() == MVT::f128) {
+    // FIXME: We may need a strict version of softenSetCCOperands before
+    // supporting f128.
+    assert(!IsStrict && "Unhandled strict operation!");
     softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1);
 
     // If softenSetCCOperands returned a scalar, use it.
@@ -21224,11 +21318,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   }
 
   SDValue X86CC;
-  SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
+  SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC, Chain,
+                                     Op.getOpcode() == ISD::STRICT_FSETCCS);
   if (!EFLAGS)
     return SDValue();
 
-  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+  SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS);
+
+  if (IsStrict)
+    return DAG.getMergeValues({Res, Chain}, dl);
+
+  return Res;
 }
 
 SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const {
@@ -21359,8 +21459,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
        (Subtarget.hasSSE1() && VT == MVT::f32)) &&
       VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
     SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
-    unsigned SSECC = translateX86FSETCC(
-        cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
+    bool IsAlwaysSignaling;
+    unsigned SSECC =
+        translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(),
+                           CondOp0, CondOp1, IsAlwaysSignaling);
 
     if (Subtarget.hasAVX512()) {
       SDValue Cmp =
@@ -21449,7 +21551,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
-  if (Cond.getOpcode() == ISD::SETCC) {
+  if (Cond.getOpcode() == ISD::SETCC ||
+      Cond.getOpcode() == ISD::STRICT_FSETCC ||
+      Cond.getOpcode() == ISD::STRICT_FSETCCS) {
     if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
       Cond = NewCond;
       // If the condition was updated, it's possible that the operands of the
@@ -22924,6 +23028,24 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   unsigned IntNo = Op.getConstantOperandVal(0);
   MVT VT = Op.getSimpleValueType();
   const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
+
+  // We share some nodes between STRICT and non STRICT FP intrinsics.
+  // For these nodes, we need chain them to entry token if they are not called
+  // by STRICT FP intrinsics.
+  auto getProperNode = [&](unsigned Opcode, EVT VT, ArrayRef<SDValue> Ops) {
+    switch (Opcode) {
+    default:
+      return DAG.getNode(Opcode, dl, VT, Ops);
+    case X86ISD::CMPP:
+    case X86ISD::CMPM:
+      break;
+    }
+
+    SmallVector<SDValue, 6> NewOps = {DAG.getEntryNode()};
+    NewOps.append(Ops.begin(), Ops.end());
+    return DAG.getNode(Opcode, dl, {VT, MVT::Other}, NewOps);
+  };
+
   if (IntrData) {
     switch(IntrData->Type) {
     case INTR_TYPE_1OP: {
@@ -23012,8 +23134,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
           return SDValue();
       }
 
-      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
-                         Src1, Src2, Src3);
+      return getProperNode(IntrData->Opc0, Op.getValueType(),
+                           {Src1, Src2, Src3});
     }
     case INTR_TYPE_4OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
@@ -23261,8 +23383,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
           return SDValue();
       }
       //default rounding mode
-      return DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1),
-                          Op.getOperand(2), CC);
+      return getProperNode(IntrData->Opc0, MaskVT,
+                           {Op.getOperand(1), Op.getOperand(2), CC});
     }
     case CMP_MASK_SCALAR_CC: {
       SDValue Src1 = Op.getOperand(1);
@@ -27902,7 +28024,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FNEG:               return LowerFABSorFNEG(Op, DAG);
   case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
   case ISD::FGETSIGN:           return LowerFGETSIGN(Op, DAG);
-  case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::SETCC:
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:     return LowerSETCC(Op, DAG);
   case ISD::SETCCCARRY:         return LowerSETCCCARRY(Op, DAG);
   case ISD::SELECT:             return LowerSELECT(Op, DAG);
   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
@@ -28804,6 +28928,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CALL:               return "X86ISD::CALL";
   case X86ISD::BT:                 return "X86ISD::BT";
   case X86ISD::CMP:                return "X86ISD::CMP";
+  case X86ISD::STRICT_FCMP:        return "X86ISD::STRICT_FCMP";
+  case X86ISD::STRICT_FCMPS:       return "X86ISD::STRICT_FCMPS";
   case X86ISD::COMI:               return "X86ISD::COMI";
   case X86ISD::UCOMI:              return "X86ISD::UCOMI";
   case X86ISD::CMPM:               return "X86ISD::CMPM";