summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/docs/LangRef.rst107
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h6
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h2
-rw-r--r--llvm/include/llvm/IR/ConstrainedOps.def11
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h19
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp21
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp22
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp47
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp14
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp14
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp24
-rw-r--r--llvm/lib/IR/Verifier.cpp11
-rw-r--r--llvm/lib/Target/SystemZ/SystemZElimCompare.cpp23
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp210
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h22
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFP.td20
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td38
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td35
-rw-r--r--llvm/lib/Target/SystemZ/SystemZPatterns.td4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZShortenInst.cpp8
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll435
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll249
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll47
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll524
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll103
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll44
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll436
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll249
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll48
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll148
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll103
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll44
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll560
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll442
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll442
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll442
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll442
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll56
41 files changed, 5394 insertions, 100 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index aa37e00b505..ee94ac57bd1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15629,6 +15629,113 @@ The result produced is a floating point value extended to be larger in size
than the operand. All restrictions that apply to the fpext instruction also
apply to this intrinsic.
+'``llvm.experimental.constrained.fcmp``' and '``llvm.experimental.constrained.fcmps``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <ty2>
+ @llvm.experimental.constrained.fcmp(<type> <op1>, <type> <op2>,
+ metadata <condition code>,
+ metadata <exception behavior>)
+ declare <ty2>
+ @llvm.experimental.constrained.fcmps(<type> <op1>, <type> <op2>,
+ metadata <condition code>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fcmp``' and
+'``llvm.experimental.constrained.fcmps``' intrinsics return a boolean
+value or vector of boolean values based on comparison of its operands.
+
+If the operands are floating-point scalars, then the result type is a
+boolean (:ref:`i1 <t_integer>`).
+
+If the operands are floating-point vectors, then the result type is a
+vector of boolean with the same number of elements as the operands being
+compared.
+
+The '``llvm.experimental.constrained.fcmp``' intrinsic performs a quiet
+comparison operation while the '``llvm.experimental.constrained.fcmps``'
+intrinsic performs a signaling comparison operation.
+
+Arguments:
+""""""""""
+
+The first two arguments to the '``llvm.experimental.constrained.fcmp``'
+and '``llvm.experimental.constrained.fcmps``' intrinsics must be
+:ref:`floating-point <t_floating>` or :ref:`vector <t_vector>`
+of floating-point values. Both arguments must have identical types.
+
+The third argument is the condition code indicating the kind of comparison
+to perform. It must be a metadata string with one of the following values:
+
+- "``oeq``": ordered and equal
+- "``ogt``": ordered and greater than
+- "``oge``": ordered and greater than or equal
+- "``olt``": ordered and less than
+- "``ole``": ordered and less than or equal
+- "``one``": ordered and not equal
+- "``ord``": ordered (no nans)
+- "``ueq``": unordered or equal
+- "``ugt``": unordered or greater than
+- "``uge``": unordered or greater than or equal
+- "``ult``": unordered or less than
+- "``ule``": unordered or less than or equal
+- "``une``": unordered or not equal
+- "``uno``": unordered (either nans)
+
+*Ordered* means that neither operand is a NAN while *unordered* means
+that either operand may be a NAN.
+
+The fourth argument specifies the exception behavior as described above.
+
+Semantics:
+""""""""""
+
+``op1`` and ``op2`` are compared according to the condition code given
+as the third argument. If the operands are vectors, then the
+vectors are compared element by element. Each comparison performed
+always yields an :ref:`i1 <t_integer>` result, as follows:
+
+- "``oeq``": yields ``true`` if both operands are not a NAN and ``op1``
+ is equal to ``op2``.
+- "``ogt``": yields ``true`` if both operands are not a NAN and ``op1``
+ is greater than ``op2``.
+- "``oge``": yields ``true`` if both operands are not a NAN and ``op1``
+ is greater than or equal to ``op2``.
+- "``olt``": yields ``true`` if both operands are not a NAN and ``op1``
+ is less than ``op2``.
+- "``ole``": yields ``true`` if both operands are not a NAN and ``op1``
+ is less than or equal to ``op2``.
+- "``one``": yields ``true`` if both operands are not a NAN and ``op1``
+ is not equal to ``op2``.
+- "``ord``": yields ``true`` if both operands are not a NAN.
+- "``ueq``": yields ``true`` if either operand is a NAN or ``op1`` is
+ equal to ``op2``.
+- "``ugt``": yields ``true`` if either operand is a NAN or ``op1`` is
+ greater than ``op2``.
+- "``uge``": yields ``true`` if either operand is a NAN or ``op1`` is
+ greater than or equal to ``op2``.
+- "``ult``": yields ``true`` if either operand is a NAN or ``op1`` is
+ less than ``op2``.
+- "``ule``": yields ``true`` if either operand is a NAN or ``op1`` is
+ less than or equal to ``op2``.
+- "``une``": yields ``true`` if either operand is a NAN or ``op1`` is
+ not equal to ``op2``.
+- "``uno``": yields ``true`` if either operand is a NAN.
+
+The quiet comparison operation performed by
+'``llvm.experimental.constrained.fcmp``' will only raise an exception
+if either operand is a SNAN. The signaling comparison operation
+performed by '``llvm.experimental.constrained.fcmps``' will raise an
+exception if either operand is a NAN (QNAN or SNAN).
+
Constrained libm-equivalent Intrinsics
--------------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 658ad31fa2a..a9f601ede65 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -330,6 +330,12 @@ namespace ISD {
/// It is used to limit optimizations while the DAG is being optimized.
STRICT_FP_EXTEND,
+ /// STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used
+ /// for floating-point operands only. STRICT_FSETCC performs a quiet
+ /// comparison operation, while STRICT_FSETCCS performs a signaling
+ /// comparison operation.
+ STRICT_FSETCC, STRICT_FSETCCS,
+
/// FMA - Perform a * b + c with no intermediate rounding step.
FMA,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index db00090a5d2..0726bdfec20 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -951,6 +951,8 @@ public:
default: llvm_unreachable("Unexpected FP pseudo-opcode");
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
#include "llvm/IR/ConstrainedOps.def"
}
diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def
index d27b3040e18..062cf479551 100644
--- a/llvm/include/llvm/IR/ConstrainedOps.def
+++ b/llvm/include/llvm/IR/ConstrainedOps.def
@@ -20,6 +20,11 @@
#define FUNCTION INSTRUCTION
#endif
+// Likewise for compare instructions.
+#ifndef CMP_INSTRUCTION
+#define CMP_INSTRUCTION INSTRUCTION
+#endif
+
// Arguments of the entries are:
// - instruction or intrinsic function name.
// - Number of original instruction/intrinsic arguments.
@@ -40,6 +45,11 @@ INSTRUCTION(FPToSI, 1, 0, experimental_constrained_fptosi, FP_TO_SINT)
INSTRUCTION(FPToUI, 1, 0, experimental_constrained_fptoui, FP_TO_UINT)
INSTRUCTION(FPTrunc, 1, 1, experimental_constrained_fptrunc, FP_ROUND)
+// These are definitions for compare instructions (signaling and quiet version).
+// Both of these match to FCmp / SETCC.
+CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmp, FSETCC)
+CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS)
+
// Theses are definitions for intrinsic functions, that are converted into
// constrained intrinsics.
//
@@ -69,3 +79,4 @@ FUNCTION(trunc, 1, 1, experimental_constrained_trunc, FTRUNC)
#undef INSTRUCTION
#undef FUNCTION
+#undef CMP_INSTRUCTION
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index c9322f9c14c..531310530a0 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -221,6 +221,25 @@ namespace llvm {
}
};
+ /// Constrained floating point compare intrinsics.
+ class ConstrainedFPCmpIntrinsic : public ConstrainedFPIntrinsic {
+ public:
+ FCmpInst::Predicate getPredicate() const;
+
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::experimental_constrained_fcmp:
+ case Intrinsic::experimental_constrained_fcmps:
+ return true;
+ default: return false;
+ }
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
+
/// This class represents an intrinsic that is based on a binary operation.
/// This includes op.with.overflow and saturating add/sub intrinsics.
class BinaryOpIntrinsic : public IntrinsicInst {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index cc29a9c9bbc..16d689e9b54 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -743,8 +743,18 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+
+ // Constrained floating-point comparison (quiet and signaling variants).
+ // Third operand is the predicate represented as a metadata string.
+ def int_experimental_constrained_fcmp
+ : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyfloat_ty, LLVMMatchType<0>,
+ llvm_metadata_ty, llvm_metadata_ty ]>;
+ def int_experimental_constrained_fcmps
+ : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyfloat_ty, LLVMMatchType<0>,
+ llvm_metadata_ty, llvm_metadata_ty ]>;
}
-// FIXME: Add intrinsic for fcmp.
// FIXME: Consider maybe adding intrinsics for sitofp, uitofp.
//===------------------------- Expect Intrinsics --------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 70cb20e48d2..b3860dda8e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1036,11 +1036,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(2).getValueType());
break;
case ISD::SELECT_CC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::STRICT_FSETCC ? 3 :
+ Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 :
Node->getOpcode() == ISD::SETCC ? 2 : 1;
- unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 :
+ Node->getOpcode() == ISD::STRICT_FSETCC ? 1 :
+ Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index dd082646ae5..9e4e5adc0b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -75,6 +75,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
@@ -817,7 +819,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
- EVT InVT = N->getOperand(0).getValueType();
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT SVT = getSetCCResultType(InVT);
@@ -836,12 +839,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
}
SDLoc dl(N);
- assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
"Vector compare must return a vector result!");
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
- N->getOperand(1), N->getOperand(2));
+ SDValue SetCC;
+ if (N->isStrictFPOpcode()) {
+ EVT VTs[] = {SVT, MVT::Other};
+ SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3)};
+ SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
+ } else
+ SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2770d512c78..1ef62921ab8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1323,7 +1323,14 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumOpers = Op.getNumOperands();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT ValueVTs[] = {EltVT, MVT::Other};
+
+ EVT TmpEltVT = EltVT;
+ if (Op->getOpcode() == ISD::STRICT_FSETCC ||
+ Op->getOpcode() == ISD::STRICT_FSETCCS)
+ TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT);
+
+ EVT ValueVTs[] = {TmpEltVT, MVT::Other};
SDValue Chain = Op.getOperand(0);
SDLoc dl(Op);
@@ -1350,9 +1357,18 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
}
SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+ SDValue ScalarResult = ScalarOp.getValue(0);
+ SDValue ScalarChain = ScalarOp.getValue(1);
+
+ if (Op->getOpcode() == ISD::STRICT_FSETCC ||
+ Op->getOpcode() == ISD::STRICT_FSETCCS)
+ ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
- OpValues.push_back(ScalarOp.getValue(0));
- OpChains.push_back(ScalarOp.getValue(1));
+ OpValues.push_back(ScalarResult);
+ OpChains.push_back(ScalarChain);
}
SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9403b344ea7..4090ee5aa13 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3758,6 +3758,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+// Return true is this is a SETCC node or a strict version of it.
+static inline bool isSETCCOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return true;
+ }
+ return false;
+}
+
// Return true if this is a node that could have two SETCCs as operands.
static inline bool isLogicalMaskOp(unsigned Opcode) {
switch (Opcode) {
@@ -3769,6 +3780,13 @@ static inline bool isLogicalMaskOp(unsigned Opcode) {
return false;
}
+// If N is a SETCC or a strict variant of it, return the type
+// of the compare operands.
+static inline EVT getSETCCOperandType(SDValue N) {
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ return N->getOperand(OpNo).getValueType();
+}
+
// This is used just for the assert in convertMask(). Check that this either
// a SETCC or a previously handled SETCC by convertMask().
#ifndef NDEBUG
@@ -3791,7 +3809,7 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) {
return isSETCCorConvertedSETCC(N.getOperand(0)) &&
isSETCCorConvertedSETCC(N.getOperand(1));
- return (N.getOpcode() == ISD::SETCC ||
+ return (isSETCCOp(N.getOpcode()) ||
ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
}
#endif
@@ -3806,10 +3824,17 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
// Make a new Mask node, with a legal result VT.
+ SDValue Mask;
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
Ops.push_back(InMask->getOperand(i));
- SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+ if (InMask->isStrictFPOpcode()) {
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
+ { MaskVT, MVT::Other }, Ops);
+ ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
+ }
+ else
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
// If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
// extend or truncate is needed.
@@ -3862,7 +3887,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
if (N->getOpcode() != ISD::VSELECT)
return SDValue();
- if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
return SDValue();
// If this is a splitted VSELECT that was previously already handled, do
@@ -3885,8 +3910,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
return SDValue();
// If there is support for an i1 vector mask, don't touch.
- if (Cond.getOpcode() == ISD::SETCC) {
- EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ if (isSETCCOp(Cond.getOpcode())) {
+ EVT SetCCOpVT = getSETCCOperandType(Cond);
while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
@@ -3917,17 +3942,17 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
SDValue Mask;
- if (Cond->getOpcode() == ISD::SETCC) {
- EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
+ if (isSETCCOp(Cond->getOpcode())) {
+ EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else if (isLogicalMaskOp(Cond->getOpcode()) &&
- Cond->getOperand(0).getOpcode() == ISD::SETCC &&
- Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ isSETCCOp(Cond->getOperand(0).getOpcode()) &&
+ isSETCCOp(Cond->getOperand(1).getOpcode())) {
// Cond is (AND/OR/XOR (SETCC, SETCC))
SDValue SETCC0 = Cond->getOperand(0);
SDValue SETCC1 = Cond->getOperand(1);
- EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
- EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
+ EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
+ EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
unsigned ScalarBits0 = VT0.getScalarSizeInBits();
unsigned ScalarBits1 = VT1.getScalarSizeInBits();
unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c1c599c5a5d..9ca51e72ec7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2798,12 +2798,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
+ }
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
@@ -3662,11 +3666,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits;
break;
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
+ }
case ISD::ROTL:
case ISD::ROTR:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -7833,6 +7841,8 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
#include "llvm/IR/ConstrainedOps.def"
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9952d4d9ac9..de4690d657e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6915,9 +6915,21 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
#include "llvm/IR/ConstrainedOps.def"
}
- if (Opcode == ISD::STRICT_FP_ROUND)
+ // A few strict DAG nodes carry additional operands that are not
+ // set up by the default code above.
+ switch (Opcode) {
+ default: break;
+ case ISD::STRICT_FP_ROUND:
Opers.push_back(
DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
+ break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
+ Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate())));
+ break;
+ }
+ }
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index f863d987648..8312a9a1d64 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -270,6 +270,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
case ISD::SETCCCARRY: return "setcccarry";
+ case ISD::STRICT_FSETCC: return "strict_fsetcc";
+ case ISD::STRICT_FSETCCS: return "strict_fsetccs";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 12b1bf1dce0..b23742b83c1 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -121,6 +121,30 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
return StrToExceptionBehavior(cast<MDString>(MD)->getString());
}
+FCmpInst::Predicate
+ConstrainedFPCmpIntrinsic::getPredicate() const {
+ Metadata *MD =
+ cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
+ if (!MD || !isa<MDString>(MD))
+ return FCmpInst::BAD_FCMP_PREDICATE;
+ return StringSwitch<FCmpInst::Predicate>(cast<MDString>(MD)->getString())
+ .Case("oeq", FCmpInst::FCMP_OEQ)
+ .Case("ogt", FCmpInst::FCMP_OGT)
+ .Case("oge", FCmpInst::FCMP_OGE)
+ .Case("olt", FCmpInst::FCMP_OLT)
+ .Case("ole", FCmpInst::FCMP_OLE)
+ .Case("one", FCmpInst::FCMP_ONE)
+ .Case("ord", FCmpInst::FCMP_ORD)
+ .Case("uno", FCmpInst::FCMP_UNO)
+ .Case("ueq", FCmpInst::FCMP_UEQ)
+ .Case("ugt", FCmpInst::FCMP_UGT)
+ .Case("uge", FCmpInst::FCMP_UGE)
+ .Case("ult", FCmpInst::FCMP_ULT)
+ .Case("ule", FCmpInst::FCMP_ULE)
+ .Case("une", FCmpInst::FCMP_UNE)
+ .Default(FCmpInst::BAD_FCMP_PREDICATE);
+}
+
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
default:
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index b32b289096a..876865edde0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4740,6 +4740,9 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
llvm_unreachable("Invalid constrained FP intrinsic!");
}
NumOperands += (1 + HasRoundingMD);
+ // Compare intrinsics carry an extra predicate metadata operand.
+ if (isa<ConstrainedFPCmpIntrinsic>(FPI))
+ NumOperands += 1;
Assert((FPI.getNumArgOperands() == NumOperands),
"invalid arguments for constrained FP intrinsic", &FPI);
@@ -4762,6 +4765,14 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
break;
}
+ case Intrinsic::experimental_constrained_fcmp:
+ case Intrinsic::experimental_constrained_fcmps: {
+ auto Pred = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI)->getPredicate();
+ Assert(CmpInst::isFPPredicate(Pred),
+ "invalid predicate for constrained FP comparison intrinsic", &FPI);
+ break;
+ }
+
case Intrinsic::experimental_constrained_fptosi:
case Intrinsic::experimental_constrained_fptoui: {
Value *Operand = FPI.getArgOperand(0);
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 90f664a7f79..8b128084181 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -295,6 +295,11 @@ bool SystemZElimCompare::convertToLoadAndTest(
MIB.setMemRefs(MI.memoperands());
MI.eraseFromParent();
+ // Mark instruction as raising an FP exception if applicable. We already
+ // verified earlier that this move is valid.
+ if (Compare.mayRaiseFPException())
+ MIB.setMIFlag(MachineInstr::MIFlag::FPExcept);
+
return true;
}
@@ -312,6 +317,18 @@ bool SystemZElimCompare::adjustCCMasksForInstr(
const MCInstrDesc &Desc = TII->get(Opcode);
unsigned MIFlags = Desc.TSFlags;
+ // If Compare may raise an FP exception, we can only eliminate it
+ // if MI itself would have already raised the exception.
+ if (Compare.mayRaiseFPException()) {
+ // If the caller will change MI to use ConvOpc, only test whether
+ // ConvOpc is suitable; it is on the caller to set the MI flag.
+ if (ConvOpc && !Desc.mayRaiseFPException())
+ return false;
+ // If the caller will not change MI, we test the MI flag here.
+ if (!ConvOpc && !MI.mayRaiseFPException())
+ return false;
+ }
+
// See which compare-style condition codes are available.
unsigned ReusableCCMask = SystemZII::getCompareZeroCCMask(MIFlags);
@@ -454,6 +471,12 @@ bool SystemZElimCompare::optimizeCompareZero(
CCRefs |= getRegReferences(MI, SystemZ::CC);
if (CCRefs.Use && CCRefs.Def)
break;
+ // Eliminating a Compare that may raise an FP exception will move
+ // raising the exception to some earlier MI. We cannot do this if
+ // there is anything in between that might change exception flags.
+ if (Compare.mayRaiseFPException() &&
+ (MI.isCall() || MI.hasUnmodeledSideEffects()))
+ break;
}
// Also do a forward search to handle cases where an instruction after the
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 7994176c4c2..0e58b526397 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -32,12 +32,16 @@ using namespace llvm;
namespace {
// Represents information about a comparison.
struct Comparison {
- Comparison(SDValue Op0In, SDValue Op1In)
- : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
+ Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
+ : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
+ Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
// The operands to the comparison.
SDValue Op0, Op1;
+ // Chain if this is a strict floating-point comparison.
+ SDValue Chain;
+
// The opcode that should be used to compare Op0 and Op1.
unsigned Opcode;
@@ -132,6 +136,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
if (isTypeLegal(VT)) {
// Lower SET_CC into an IPM-based sequence.
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
setOperationAction(ISD::SELECT, VT, Expand);
@@ -373,6 +379,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ if (Subtarget.hasVectorEnhancements1())
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
}
@@ -2168,6 +2177,10 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
// negation to set CC, so avoiding separate LOAD AND TEST and
// LOAD (NEGATIVE/COMPLEMENT) instructions.
static void adjustForFNeg(Comparison &C) {
+ // This optimization is invalid for strict comparisons, since FNEG
+ // does not raise any exceptions.
+ if (C.Chain)
+ return;
auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
if (C1 && C1->isZero()) {
for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
@@ -2455,7 +2468,7 @@ static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
SDValue Call, unsigned CCValid, uint64_t CC,
ISD::CondCode Cond) {
- Comparison C(Call, SDValue());
+ Comparison C(Call, SDValue(), SDValue());
C.Opcode = Opcode;
C.CCValid = CCValid;
if (Cond == ISD::SETEQ)
@@ -2486,8 +2499,11 @@ static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
- ISD::CondCode Cond, const SDLoc &DL) {
+ ISD::CondCode Cond, const SDLoc &DL,
+ SDValue Chain = SDValue(),
+ bool IsSignaling = false) {
if (CmpOp1.getOpcode() == ISD::Constant) {
+ assert(!Chain);
uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
unsigned Opcode, CCValid;
if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
@@ -2499,13 +2515,19 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
}
- Comparison C(CmpOp0, CmpOp1);
+ Comparison C(CmpOp0, CmpOp1, Chain);
C.CCMask = CCMaskForCondCode(Cond);
if (C.Op0.getValueType().isFloatingPoint()) {
C.CCValid = SystemZ::CCMASK_FCMP;
- C.Opcode = SystemZISD::FCMP;
+ if (!C.Chain)
+ C.Opcode = SystemZISD::FCMP;
+ else if (!IsSignaling)
+ C.Opcode = SystemZISD::STRICT_FCMP;
+ else
+ C.Opcode = SystemZISD::STRICT_FCMPS;
adjustForFNeg(C);
} else {
+ assert(!C.Chain);
C.CCValid = SystemZ::CCMASK_ICMP;
C.Opcode = SystemZISD::ICMP;
// Choose the type of comparison. Equality and inequality tests can
@@ -2563,6 +2585,10 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
}
+ if (C.Chain) {
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
+ }
return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
}
@@ -2607,24 +2633,51 @@ static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
}
// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
-// be done directly. IsFP is true if CC is for a floating-point rather than
-// integer comparison.
-static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
+// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
+// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
+// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
+// floating-point comparisons.
+enum class CmpMode { Int, FP, StrictFP, SignalingFP };
+static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
switch (CC) {
case ISD::SETOEQ:
case ISD::SETEQ:
- return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
+ switch (Mode) {
+ case CmpMode::Int: return SystemZISD::VICMPE;
+ case CmpMode::FP: return SystemZISD::VFCMPE;
+ case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
+ case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
+ default: llvm_unreachable("Bad mode");
+ }
case ISD::SETOGE:
case ISD::SETGE:
- return IsFP ? SystemZISD::VFCMPHE : static_cast<SystemZISD::NodeType>(0);
+ switch (Mode) {
+ case CmpMode::Int: return 0;
+ case CmpMode::FP: return SystemZISD::VFCMPHE;
+ case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
+ case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
+ default: llvm_unreachable("Bad mode");
+ }
case ISD::SETOGT:
case ISD::SETGT:
- return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
+ switch (Mode) {
+ case CmpMode::Int: return SystemZISD::VICMPH;
+ case CmpMode::FP: return SystemZISD::VFCMPH;
+ case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
+ case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
+ default: llvm_unreachable("Bad mode");
+ }
case ISD::SETUGT:
- return IsFP ? static_cast<SystemZISD::NodeType>(0) : SystemZISD::VICMPHL;
+ switch (Mode) {
+ case CmpMode::Int: return SystemZISD::VICMPHL;
+ case CmpMode::FP: return 0;
+ case CmpMode::StrictFP: return 0;
+ case CmpMode::SignalingFP: return 0;
+ default: llvm_unreachable("Bad mode");
+ }
default:
return 0;
@@ -2633,17 +2686,16 @@ static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
// Return the SystemZISD vector comparison operation for CC or its inverse,
// or 0 if neither can be done directly. Indicate in Invert whether the
-// result is for the inverse of CC. IsFP is true if CC is for a
-// floating-point rather than integer comparison.
-static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
+// result is for the inverse of CC. Mode is as above.
+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
bool &Invert) {
- if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ if (unsigned Opcode = getVectorComparison(CC, Mode)) {
Invert = false;
return Opcode;
}
- CC = ISD::getSetCCInverse(CC, !IsFP);
- if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
+ CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int);
+ if (unsigned Opcode = getVectorComparison(CC, Mode)) {
Invert = true;
return Opcode;
}
@@ -2652,44 +2704,73 @@ static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
}
// Return a v2f64 that contains the extended form of elements Start and Start+1
-// of v4f32 value Op.
+// of v4f32 value Op. If Chain is nonnull, return the strict form.
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
- SDValue Op) {
+ SDValue Op, SDValue Chain) {
int Mask[] = { Start, -1, Start + 1, -1 };
Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
+ if (Chain) {
+ SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
+ return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
+ }
return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
}
// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
-// producing a result of type VT.
+// producing a result of type VT. If Chain is nonnull, return the strict form.
SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
const SDLoc &DL, EVT VT,
SDValue CmpOp0,
- SDValue CmpOp1) const {
+ SDValue CmpOp1,
+ SDValue Chain) const {
// There is no hardware support for v4f32 (unless we have the vector
// enhancements facility 1), so extend the vector into two v2f64s
// and compare those.
if (CmpOp0.getValueType() == MVT::v4f32 &&
!Subtarget.hasVectorEnhancements1()) {
- SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
- SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
- SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
- SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
+ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
+ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
+ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
+ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
+ if (Chain) {
+ SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
+ SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
+ SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
+ SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
+ H1.getValue(1), L1.getValue(1),
+ HRes.getValue(1), LRes.getValue(1) };
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ SDValue Ops[2] = { Res, NewChain };
+ return DAG.getMergeValues(Ops, DL);
+ }
SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
}
+ if (Chain) {
+ SDVTList VTs = DAG.getVTList(VT, MVT::Other);
+ return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
+ }
return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
}
// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
-// an integer mask of type VT.
+// an integer mask of type VT. If Chain is nonnull, we have a strict
+// floating-point comparison. If in addition IsSignaling is true, we have
+// a strict signaling floating-point comparison.
SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
const SDLoc &DL, EVT VT,
ISD::CondCode CC,
SDValue CmpOp0,
- SDValue CmpOp1) const {
+ SDValue CmpOp1,
+ SDValue Chain,
+ bool IsSignaling) const {
bool IsFP = CmpOp0.getValueType().isFloatingPoint();
+ assert (!Chain || IsFP);
+ assert (!IsSignaling || Chain);
+ CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
+ Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
bool Invert = false;
SDValue Cmp;
switch (CC) {
@@ -2699,9 +2780,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
LLVM_FALLTHROUGH;
case ISD::SETO: {
assert(IsFP && "Unexpected integer comparison");
- SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
- SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
+ SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+ DL, VT, CmpOp1, CmpOp0, Chain);
+ SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
+ DL, VT, CmpOp0, CmpOp1, Chain);
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ LT.getValue(1), GE.getValue(1));
break;
}
@@ -2711,9 +2797,14 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
LLVM_FALLTHROUGH;
case ISD::SETONE: {
assert(IsFP && "Unexpected integer comparison");
- SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
- SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
+ SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+ DL, VT, CmpOp1, CmpOp0, Chain);
+ SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
+ DL, VT, CmpOp0, CmpOp1, Chain);
Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ LT.getValue(1), GT.getValue(1));
break;
}
@@ -2721,15 +2812,17 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
// matter whether we try the inversion or the swap first, since
// there are no cases where both work.
default:
- if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
- Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
else {
CC = ISD::getSetCCSwappedOperands(CC);
- if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
- Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
else
llvm_unreachable("Unhandled comparison");
}
+ if (Chain)
+ Chain = Cmp.getValue(1);
break;
}
if (Invert) {
@@ -2737,6 +2830,10 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
}
+ if (Chain && Chain.getNode() != Cmp.getNode()) {
+ SDValue Ops[2] = { Cmp, Chain };
+ Cmp = DAG.getMergeValues(Ops, DL);
+ }
return Cmp;
}
@@ -2755,6 +2852,29 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
}
+SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
+ SelectionDAG &DAG,
+ bool IsSignaling) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue CmpOp0 = Op.getOperand(1);
+ SDValue CmpOp1 = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
+ SDLoc DL(Op);
+ EVT VT = Op.getNode()->getValueType(0);
+ if (VT.isVector()) {
+ SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
+ Chain, IsSignaling);
+ return Res.getValue(Op.getResNo());
+ }
+
+ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
+ SDValue CCReg = emitCmp(DAG, DL, C);
+ CCReg->setFlags(Op->getFlags());
+ SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
+ SDValue Ops[2] = { Result, CCReg.getValue(1) };
+ return DAG.getMergeValues(Ops, DL);
+}
+
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue CmpOp0 = Op.getOperand(2);
@@ -4966,6 +5086,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSELECT_CC(Op, DAG);
case ISD::SETCC:
return lowerSETCC(Op, DAG);
+ case ISD::STRICT_FSETCC:
+ return lowerSTRICT_FSETCC(Op, DAG, false);
+ case ISD::STRICT_FSETCCS:
+ return lowerSTRICT_FSETCC(Op, DAG, true);
case ISD::GlobalAddress:
return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
case ISD::GlobalTLSAddress:
@@ -5171,6 +5295,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(IABS);
OPCODE(ICMP);
OPCODE(FCMP);
+ OPCODE(STRICT_FCMP);
+ OPCODE(STRICT_FCMPS);
OPCODE(TM);
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
@@ -5233,13 +5359,20 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VICMPHS);
OPCODE(VICMPHLS);
OPCODE(VFCMPE);
+ OPCODE(STRICT_VFCMPE);
+ OPCODE(STRICT_VFCMPES);
OPCODE(VFCMPH);
+ OPCODE(STRICT_VFCMPH);
+ OPCODE(STRICT_VFCMPHS);
OPCODE(VFCMPHE);
+ OPCODE(STRICT_VFCMPHE);
+ OPCODE(STRICT_VFCMPHES);
OPCODE(VFCMPES);
OPCODE(VFCMPHS);
OPCODE(VFCMPHES);
OPCODE(VFTCI);
OPCODE(VEXTEND);
+ OPCODE(STRICT_VEXTEND);
OPCODE(VROUND);
OPCODE(VTM);
OPCODE(VFAE_CC);
@@ -7554,7 +7687,8 @@ MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
// Replace pseudo with a normal load-and-test that models the def as
// well.
BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
- .addReg(SrcReg);
+ .addReg(SrcReg)
+ .setMIFlags(MI.getFlags());
MI.eraseFromParent();
return MBB;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f774b8a896c..e49c47e379e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -58,7 +58,8 @@ enum NodeType : unsigned {
ICMP,
// Floating-point comparisons. The two operands are the values to compare.
- FCMP,
+ // Regular and strict (quiet and signaling) versions.
+ FCMP, STRICT_FCMP, STRICT_FCMPS,
// Test under mask. The first operand is ANDed with the second operand
// and the condition codes are set on the result. The third operand is
@@ -248,9 +249,10 @@ enum NodeType : unsigned {
// Compare floating-point vector operands 0 and 1 to produce the usual 0/-1
// vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and
// greater than" and VFCMPHE for "ordered and greater than or equal to".
- VFCMPE,
- VFCMPH,
- VFCMPHE,
+ // Regular and strict (quiet and signaling) versions.
+ VFCMPE, STRICT_VFCMPE, STRICT_VFCMPES,
+ VFCMPH, STRICT_VFCMPH, STRICT_VFCMPHS,
+ VFCMPHE, STRICT_VFCMPHE, STRICT_VFCMPHES,
// Likewise, but also set the condition codes on the result.
VFCMPES,
@@ -261,8 +263,8 @@ enum NodeType : unsigned {
VFTCI,
// Extend the even f32 elements of vector operand 0 to produce a vector
- // of f64 elements.
- VEXTEND,
+ // of f64 elements. Regular and strict versions.
+ VEXTEND, STRICT_VEXTEND,
// Round the f64 elements of vector operand 0 to f32s and store them in the
// even elements of the result.
@@ -531,11 +533,15 @@ private:
// Implement LowerOperation for individual opcodes.
SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
const SDLoc &DL, EVT VT,
- SDValue CmpOp0, SDValue CmpOp1) const;
+ SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const;
SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL,
EVT VT, ISD::CondCode CC,
- SDValue CmpOp0, SDValue CmpOp1) const;
+ SDValue CmpOp0, SDValue CmpOp1,
+ SDValue Chain = SDValue(),
+ bool IsSignaling = false) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG,
+ bool IsSignaling) const;
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 3a185e538be..2aea5f961f6 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -537,19 +537,19 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
//===----------------------------------------------------------------------===//
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
- def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>;
- def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>;
- def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
+ def CEBR : CompareRRE<"cebr", 0xB309, z_any_fcmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdbr", 0xB319, z_any_fcmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxbr", 0xB349, z_any_fcmp, FP128, FP128>;
- def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
- def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
+ def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, load, 8>;
- def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>;
- def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>;
- def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>;
+ def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>;
+ def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>;
+ def KXBR : CompareRRE<"kxbr", 0xB348, z_strict_fcmps, FP128, FP128>;
- def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>;
- def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>;
+ def KEB : CompareRXE<"keb", 0xED08, z_strict_fcmps, FP32, load, 4>;
+ def KDB : CompareRXE<"kdb", 0xED18, z_strict_fcmps, FP64, load, 8>;
}
// Test Data Class.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 02364bbda5c..fe242dd6290 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1134,7 +1134,7 @@ let Predicates = [FeatureVector] in {
// Load lengthened.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
- def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_any_vextend, v128db, v128sb, 2, 0>;
def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
}
let Predicates = [FeatureVectorEnhancements1] in {
@@ -1364,32 +1364,32 @@ let Predicates = [FeatureVector] in {
// Compare scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
- def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_any_fcmp, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_fcmp, v32sb, 2>;
- def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_fcmp, v128xb, 4>;
+ def WFCSB : CompareVRRa<"wfcsb", 0xE7CB, z_any_fcmp, v32sb, 2>;
+ def WFCXB : CompareVRRa<"wfcxb", 0xE7CB, z_any_fcmp, v128xb, 4>;
}
}
// Compare and signal scalar.
let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
- def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, z_strict_fcmps, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
- def WFKSB : CompareVRRa<"wfksb", 0xE7CA, null_frag, v32sb, 2>;
- def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, null_frag, v128xb, 4>;
+ def WFKSB : CompareVRRa<"wfksb", 0xE7CA, z_strict_fcmps, v32sb, 2>;
+ def WFKXB : CompareVRRa<"wfkxb", 0xE7CA, z_strict_fcmps, v128xb, 4>;
}
}
// Compare equal.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
- defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_any_vfcmpe, z_vfcmpes,
v128g, v128db, 3, 0>;
defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
v64g, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+ defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_any_vfcmpe, z_vfcmpes,
v128f, v128sb, 2, 0>;
defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
v32f, v32sb, 2, 8>;
@@ -1401,11 +1401,11 @@ let Predicates = [FeatureVector] in {
// Compare and signal equal.
let Uses = [FPC], mayRaiseFPException = 1,
Predicates = [FeatureVectorEnhancements1] in {
- defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
+ defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, z_strict_vfcmpes, null_frag,
v128g, v128db, 3, 4>;
defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
v64g, v64db, 3, 12>;
- defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, null_frag, null_frag,
+ defm VFKESB : BinaryVRRcSPair<"vfkesb", 0xE7E8, z_strict_vfcmpes, null_frag,
v128f, v128sb, 2, 4>;
defm WFKESB : BinaryVRRcSPair<"wfkesb", 0xE7E8, null_frag, null_frag,
v32f, v32sb, 2, 12>;
@@ -1416,12 +1416,12 @@ let Predicates = [FeatureVector] in {
// Compare high.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
- defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_any_vfcmph, z_vfcmphs,
v128g, v128db, 3, 0>;
defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
v64g, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+ defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_any_vfcmph, z_vfcmphs,
v128f, v128sb, 2, 0>;
defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
v32f, v32sb, 2, 8>;
@@ -1433,11 +1433,11 @@ let Predicates = [FeatureVector] in {
// Compare and signal high.
let Uses = [FPC], mayRaiseFPException = 1,
Predicates = [FeatureVectorEnhancements1] in {
- defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
+ defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, z_strict_vfcmphs, null_frag,
v128g, v128db, 3, 4>;
defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
v64g, v64db, 3, 12>;
- defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, null_frag, null_frag,
+ defm VFKHSB : BinaryVRRcSPair<"vfkhsb", 0xE7EB, z_strict_vfcmphs, null_frag,
v128f, v128sb, 2, 4>;
defm WFKHSB : BinaryVRRcSPair<"wfkhsb", 0xE7EB, null_frag, null_frag,
v32f, v32sb, 2, 12>;
@@ -1448,12 +1448,12 @@ let Predicates = [FeatureVector] in {
// Compare high or equal.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
- defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_any_vfcmphe, z_vfcmphes,
v128g, v128db, 3, 0>;
defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
v64g, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+ defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_any_vfcmphe, z_vfcmphes,
v128f, v128sb, 2, 0>;
defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
v32f, v32sb, 2, 8>;
@@ -1465,11 +1465,11 @@ let Predicates = [FeatureVector] in {
// Compare and signal high or equal.
let Uses = [FPC], mayRaiseFPException = 1,
Predicates = [FeatureVectorEnhancements1] in {
- defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
+ defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, z_strict_vfcmphes, null_frag,
v128g, v128db, 3, 4>;
defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
v64g, v64db, 3, 12>;
- defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, null_frag, null_frag,
+ defm VFKHESB : BinaryVRRcSPair<"vfkhesb", 0xE7EA, z_strict_vfcmphes, null_frag,
v128f, v128sb, 2, 4>;
defm WFKHESB : BinaryVRRcSPair<"wfkhesb", 0xE7EA, null_frag, null_frag,
v32f, v32sb, 2, 12>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 6fe383e64b7..0beefc4682a 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -258,6 +258,10 @@ def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
def z_iabs : SDNode<"SystemZISD::IABS", SDTIntUnaryOp, []>;
def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>;
def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>;
+def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp,
+ [SDNPHasChain]>;
+def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp,
+ [SDNPHasChain]>;
def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>;
def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain]>;
@@ -328,12 +332,26 @@ def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>;
def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>;
def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>;
def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
+def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
+def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
+def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES",
+ SDT_ZVecBinaryConv, [SDNPHasChain]>;
def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>;
def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>;
def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>;
def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
+def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND",
+ SDT_ZVecUnaryConv, [SDNPHasChain]>;
def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>;
def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>;
@@ -707,6 +725,23 @@ def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
// Floating-point negative absolute.
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+// Strict floating-point fragments.
+def z_any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_fcmp node:$lhs, node:$rhs),
+ (z_fcmp node:$lhs, node:$rhs)]>;
+def z_any_vfcmpe : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_vfcmpe node:$lhs, node:$rhs),
+ (z_vfcmpe node:$lhs, node:$rhs)]>;
+def z_any_vfcmph : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_vfcmph node:$lhs, node:$rhs),
+ (z_vfcmph node:$lhs, node:$rhs)]>;
+def z_any_vfcmphe : PatFrags<(ops node:$lhs, node:$rhs),
+ [(z_strict_vfcmphe node:$lhs, node:$rhs),
+ (z_vfcmphe node:$lhs, node:$rhs)]>;
+def z_any_vextend : PatFrags<(ops node:$src),
+ [(z_strict_vextend node:$src),
+ (z_vextend node:$src)]>;
+
// Create a unary operator that loads from memory and then performs
// the given operation on it.
class loadu<SDPatternOperator operator, SDPatternOperator load = load>
diff --git a/llvm/lib/Target/SystemZ/SystemZPatterns.td b/llvm/lib/Target/SystemZ/SystemZPatterns.td
index 65300fb4762..501a6948839 100644
--- a/llvm/lib/Target/SystemZ/SystemZPatterns.td
+++ b/llvm/lib/Target/SystemZ/SystemZPatterns.td
@@ -148,9 +148,9 @@ multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
// registers in CLS against zero. The instruction has separate R1 and R2
// operands, but they must be the same when the instruction is used like this.
multiclass CompareZeroFP<Instruction insn, RegisterOperand cls> {
- def : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+ def : Pat<(z_any_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
// The sign of the zero makes no difference.
- def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
+ def : Pat<(z_any_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
}
// Use INSN for performing binary operation OPERATION of type VT
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 2aca22c9082..1b9c8ea2759 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -313,6 +313,14 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
Changed |= shortenOn01(MI, SystemZ::CEBR);
break;
+ case SystemZ::WFKDB:
+ Changed |= shortenOn01(MI, SystemZ::KDBR);
+ break;
+
+ case SystemZ::WFKSB:
+ Changed |= shortenOn01(MI, SystemZ::KEBR);
+ break;
+
case SystemZ::VL32:
// For z13 we prefer LDE over LE to avoid partial register dependencies.
Changed |= shortenOn0(MI, SystemZ::LDE32);
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll
new file mode 100644
index 00000000000..3a815294a31
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-01.ll
@@ -0,0 +1,435 @@
+; Test 32-bit floating-point strict comparison. The tests assume a z10
+; implementation of select, using conditional branches rather than LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @foo()
+
+; Check comparison with registers.
+define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: cebr %f0, %f2
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the low end of the CEB range.
+define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the high end of the aligned CEB range.
+define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) #0 {
+; CHECK-LABEL: f3:
+; CHECK: ceb %f0, 4092(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) #0 {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r4, 4096
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) #0 {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r4, -4
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that CEB allows indices.
+define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) #0 {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r5, 2
+; CHECK: ceb %f0, 400(%r1,%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that comparisons of spilled values can use CEB rather than CEBR.
+define float @f7(float *%ptr0) #0 {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %ret = call float @foo() #0
+
+ %cmp0 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp1 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val1,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp2 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp3 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val3,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp4 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val4,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp5 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val5,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp6 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val6,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp7 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val7,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp8 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val8,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp9 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val9,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp10 = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %ret, float %val10,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+
+ %sel0 = select i1 %cmp0, float %ret, float 0.0
+ %sel1 = select i1 %cmp1, float %sel0, float 1.0
+ %sel2 = select i1 %cmp2, float %sel1, float 2.0
+ %sel3 = select i1 %cmp3, float %sel2, float 3.0
+ %sel4 = select i1 %cmp4, float %sel3, float 4.0
+ %sel5 = select i1 %cmp5, float %sel4, float 5.0
+ %sel6 = select i1 %cmp6, float %sel5, float 6.0
+ %sel7 = select i1 %cmp7, float %sel6, float 7.0
+ %sel8 = select i1 %cmp8, float %sel7, float 8.0
+ %sel9 = select i1 %cmp9, float %sel8, float 9.0
+ %sel10 = select i1 %cmp10, float %sel9, float 10.0
+
+ ret float %sel10
+}
+
+; Check comparison with zero.
+define i64 @f8(i64 %a, i64 %b, float %f) #0 {
+; CHECK-LABEL: f8:
+; CHECK: ltebr %f0, %f0
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CEB to be used,
+; first with oeq.
+define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f9:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then one.
+define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f10:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: blhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then olt.
+define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f11:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ole.
+define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f12:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bher %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then oge.
+define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f13:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bler %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnle %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ogt.
+define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f14:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: blr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ueq.
+define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f15:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnlhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrlh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then une.
+define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f16:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bner %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgre %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ult.
+define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f17:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnler %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrle %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ule.
+define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f18:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnlr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrl %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then uge.
+define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f19:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ugt.
+define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f20:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnher %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrhe %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll
new file mode 100644
index 00000000000..12a39951fff
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-02.ll
@@ -0,0 +1,249 @@
+; Test 64-bit floating-point strict comparison. The tests assume a z10
+; implementation of select, using conditional branches rather than LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @foo()
+
+; Check comparison with registers.
+define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: cdbr %f0, %f2
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the low end of the CDB range.
+define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the high end of the aligned CDB range.
+define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) #0 {
+; CHECK-LABEL: f3:
+; CHECK: cdb %f0, 4088(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) #0 {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r4, 4096
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) #0 {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r4, -8
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that CDB allows indices.
+define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) #0 {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r5, 3
+; CHECK: cdb %f0, 800(%r1,%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that comparisons of spilled values can use CDB rather than CDBR.
+define double @f7(double *%ptr0) #0 {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %ret = call double @foo() #0
+
+ %cmp0 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp1 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val1,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp2 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp3 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val3,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp4 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val4,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp5 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val5,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp6 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val6,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp7 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val7,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp8 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val8,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp9 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val9,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp10 = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %ret, double %val10,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+
+ %sel0 = select i1 %cmp0, double %ret, double 0.0
+ %sel1 = select i1 %cmp1, double %sel0, double 1.0
+ %sel2 = select i1 %cmp2, double %sel1, double 2.0
+ %sel3 = select i1 %cmp3, double %sel2, double 3.0
+ %sel4 = select i1 %cmp4, double %sel3, double 4.0
+ %sel5 = select i1 %cmp5, double %sel4, double 5.0
+ %sel6 = select i1 %cmp6, double %sel5, double 6.0
+ %sel7 = select i1 %cmp7, double %sel6, double 7.0
+ %sel8 = select i1 %cmp8, double %sel7, double 8.0
+ %sel9 = select i1 %cmp9, double %sel8, double 9.0
+ %sel10 = select i1 %cmp10, double %sel9, double 10.0
+
+ ret double %sel10
+}
+
+; Check comparison with zero.
+define i64 @f8(i64 %a, i64 %b, double %f) #0 {
+; CHECK-LABEL: f8:
+; CHECK-SCALAR: ltdbr %f0, %f0
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR: ltdbr %f0, %f0
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows CDB to be used,
+define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) #0 {
+; CHECK-LABEL: f9:
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: blr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
+; CHECK: br %r14
+ %f1 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll
new file mode 100644
index 00000000000..ae94572eb16
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-03.ll
@@ -0,0 +1,47 @@
+; Test 128-bit floating-point strict comparison. The tests assume a z10
+; implementation of select, using conditional branches rather than LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; There is no memory form of 128-bit comparison.
+define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r4)
+; CHECK-DAG: ld %f3, 8(%r4)
+; CHECK: cxbr %f1, %f0
+; CHECK-NEXT: ber %r14
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+ %f2x = fpext float %f2 to fp128
+ %f1 = load fp128, fp128 *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
+ fp128 %f1, fp128 %f2x,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check comparison with zero.
+define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK: ld %f0, 0(%r4)
+; CHECK: ld %f2, 8(%r4)
+; CHECK: ltxbr %f0, %f0
+; CHECK-NEXT: ber %r14
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+ %f = load fp128, fp128 *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
+ fp128 %f, fp128 0xL00000000000000000000000000000000,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
new file mode 100644
index 00000000000..b62532ed6f7
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-04.ll
@@ -0,0 +1,524 @@
+; Test that floating-point strict compares are omitted if CC already has the
+; right value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: -enable-misched=0 -no-integrated-as | FileCheck %s
+;
+; We need -enable-misched=0 to make sure f12 and following routines really
+; test the compare elimination pass.
+
+
+declare float @llvm.fabs.f32(float %f)
+
+; Test addition followed by EQ, which can use the CC result of the addition.
+define float @f1(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f1:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: ber %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; ...and again with LT.
+define float @f2(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f2:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; ...and again with GT.
+define float @f3(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f3:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: bhr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; ...and again with UEQ.
+define float @f4(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f4:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: bnlhr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Subtraction also provides a zero-based CC value.
+define float @f5(float %a, float %b, float *%dest) {
+; CHECK-LABEL: f5:
+; CHECK: seb %f0, 0(%r2)
+; CHECK-NEXT: bnher %r14
+; CHECK: br %r14
+entry:
+ %cur = load float, float *%dest
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %a, float %cur,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test the result of LOAD POSITIVE. We cannot omit the LTEBR.
+define float @f6(float %dummy, float %a, float *%dest) #0 {
+; CHECK-LABEL: f6:
+; CHECK: lpdfr %f0, %f2
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: bhr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.fabs.f32(float %a)
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %res, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test the result of LOAD NEGATIVE. We cannot omit the LTEBR.
+define float @f7(float %dummy, float %a, float *%dest) #0 {
+; CHECK-LABEL: f7:
+; CHECK: lndfr %f0, %f2
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %abs = call float @llvm.fabs.f32(float %a)
+ %res = fneg float %abs
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %res, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test the result of LOAD COMPLEMENT. We cannot omit the LTEBR.
+define float @f8(float %dummy, float %a, float *%dest) #0 {
+; CHECK-LABEL: f8:
+; CHECK: lcdfr %f0, %f2
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: bler %r14
+; CHECK: br %r14
+entry:
+ %res = fneg float %a
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %res, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Multiplication (for example) does not modify CC.
+define float @f9(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f9:
+; CHECK: meebr %f0, %f2
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: blhr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test a combination involving a CC-setting instruction followed by
+; a non-CC-setting instruction.
+define float @f10(float %a, float %b, float %c, float *%dest) #0 {
+; CHECK-LABEL: f10:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: debr %f0, %f4
+; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: bner %r14
+; CHECK: br %r14
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %add, float %c,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test a case where CC is set based on a different register from the
+; compare input.
+define float @f11(float %a, float %b, float %c, float *%dest1, float *%dest2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: sebr %f4, %f0
+; CHECK-DAG: ste %f4, 0(%r2)
+; CHECK-DAG: ltebr %f0, %f0
+; CHECK-NEXT: ber %r14
+; CHECK: br %r14
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %sub = call float @llvm.experimental.constrained.fsub.f32(
+ float %c, float %add,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ store float %sub, float *%dest1
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %add, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %sub, float *%dest2
+ br label %exit
+
+exit:
+ ret float %add
+}
+
+; Test that LER gets converted to LTEBR where useful.
+define float @f12(float %dummy, float %val) #0 {
+; CHECK-LABEL: f12:
+; CHECK: ltebr %f0, %f2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f0
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %ret = call float asm "blah $1", "=f,{f0}"(float %val)
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %val, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret float %ret
+}
+
+; Test that LDR gets converted to LTDBR where useful.
+define double @f13(double %dummy, double %val) #0 {
+; CHECK-LABEL: f13:
+; CHECK: ltdbr %f0, %f2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f0
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %ret = call double asm "blah $1", "=f,{f0}"(double %val)
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %val, double 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret double %ret
+}
+
+; Test that LXR gets converted to LTXBR where useful.
+define void @f14(fp128 *%ptr1, fp128 *%ptr2) #0 {
+; CHECK-LABEL: f14:
+; CHECK: ltxbr
+; CHECK-NEXT: dxbr
+; CHECK-NEXT: std
+; CHECK-NEXT: std
+; CHECK-NEXT: mxbr
+; CHECK-NEXT: std
+; CHECK-NEXT: std
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %val1 = load fp128, fp128 *%ptr1
+ %val2 = load fp128, fp128 *%ptr2
+ %div = fdiv fp128 %val1, %val2
+ store fp128 %div, fp128 *%ptr1
+ %mul = fmul fp128 %val1, %val2
+ store fp128 %mul, fp128 *%ptr2
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f128(
+ fp128 %val1, fp128 0xL00000000000000000000000000000000,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Test a case where it is the source rather than destination of LER that
+; we need.
+define float @f15(float %val, float %dummy) #0 {
+; CHECK-LABEL: f15:
+; CHECK: ltebr %f2, %f0
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %ret = call float asm "blah $1", "=f,{f2}"(float %val)
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %val, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret float %ret
+}
+
+; Test a case where it is the source rather than destination of LDR that
+; we need.
+define double @f16(double %val, double %dummy) #0 {
+; CHECK-LABEL: f16:
+; CHECK: ltdbr %f2, %f0
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f2
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %ret = call double asm "blah $1", "=f,{f2}"(double %val)
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %val, double 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret double %ret
+}
+
+; Repeat f2 with a comparison against -0.
+define float @f17(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f17:
+; CHECK: aebr %f0, %f2
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float -0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Verify that we cannot omit the compare if there may be an intervening
+; change to the exception flags.
+define float @f18(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f18:
+; CHECK: aebr %f0, %f2
+; CHECK: ltebr %f0, %f0
+; CHECK-NEXT: ber %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ call void asm sideeffect "blah", ""()
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %res, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Verify that we cannot convert LER to LTEBR and omit the compare if
+; there may be an intervening change to the exception flags.
+define float @f19(float %dummy, float %val) #0 {
+; CHECK-LABEL: f19:
+; CHECK: ler %f0, %f2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f0
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ltebr %f2, %f2
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %ret = call float asm sideeffect "blah $1", "=f,{f0}"(float %val)
+ %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %val, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret float %ret
+}
+
+attributes #0 = { strictfp }
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata)
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll
new file mode 100644
index 00000000000..590705a5504
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-05.ll
@@ -0,0 +1,103 @@
+; Test that floating-point instructions that set cc are *not* used to
+; eliminate *strict* compares for load complement, load negative and load
+; positive
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Load complement (sign-bit flipped).
+; Test f32
+define float @f1(float %a, float %b, float %f) #0 {
+; CHECK-LABEL: f1:
+; CHECK: ltebr
+; CHECK-NEXT: ber %r14
+ %neg = fneg float %f
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %neg, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+define double @f2(double %a, double %b, double %f) #0 {
+; CHECK-LABEL: f2:
+; CHECK: ltdbr
+; CHECK-NEXT: ber %r14
+ %neg = fneg double %f
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %neg, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+; Negation of floating-point absolute.
+; Test f32
+declare float @llvm.fabs.f32(float %f)
+define float @f3(float %a, float %b, float %f) #0 {
+; CHECK-LABEL: f3:
+; CHECK: ltebr
+; CHECK-NEXT: ber %r14
+ %abs = call float @llvm.fabs.f32(float %f)
+ %neg = fneg float %abs
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %neg, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+declare double @llvm.fabs.f64(double %f)
+define double @f4(double %a, double %b, double %f) #0 {
+; CHECK-LABEL: f4:
+; CHECK: ltdbr
+; CHECK-NEXT: ber %r14
+ %abs = call double @llvm.fabs.f64(double %f)
+ %neg = fneg double %abs
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %neg, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+; Absolute floating-point value.
+; Test f32
+define float @f5(float %a, float %b, float %f) #0 {
+; CHECK-LABEL: f5:
+; CHECK: ltebr
+; CHECK-NEXT: ber %r14
+ %abs = call float @llvm.fabs.f32(float %f)
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %abs, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+define double @f6(double %a, double %b, double %f) #0 {
+; CHECK-LABEL: f6:
+; CHECK: ltdbr
+; CHECK-NEXT: ber %r14
+ %abs = call double @llvm.fabs.f64(double %f)
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %abs, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll
new file mode 100644
index 00000000000..5192ec6d134
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmp-06.ll
@@ -0,0 +1,44 @@
+; Test f128 strict comparisons on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; There is no memory form of 128-bit comparison.
+define i64 @f1(i64 %a, i64 %b, fp128 *%ptr1, fp128 *%ptr2) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r5)
+; CHECK: wfcxb [[REG1]], [[REG2]]
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
+ fp128 %f1, fp128 %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check comparison with zero -- it is not worthwhile to copy to
+; FP pairs just so we can use LTXBR, so simply load up a zero.
+define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4)
+; CHECK-DAG: vzero [[REG2:%v[0-9]+]]
+; CHECK: wfcxb [[REG1]], [[REG2]]
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f = load fp128, fp128 *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f128(
+ fp128 %f, fp128 0xL00000000000000000000000000000000,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmp.f128(fp128, fp128, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll
new file mode 100644
index 00000000000..fa51e53a4b9
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-01.ll
@@ -0,0 +1,436 @@
+; Test 32-bit floating-point signaling comparison. The tests assume a z10
+; implementation of select, using conditional branches rather than LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @foo()
+
+; Check comparison with registers.
+define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: kebr %f0, %f2
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the low end of the KEB range.
+define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the high end of the aligned KEB range.
+define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) #0 {
+; CHECK-LABEL: f3:
+; CHECK: keb %f0, 4092(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) #0 {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r4, 4096
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) #0 {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r4, -4
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that KEB allows indices.
+define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) #0 {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r5, 2
+; CHECK: keb %f0, 400(%r1,%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that comparisons of spilled values can use KEB rather than KEBR.
+define float @f7(float *%ptr0) #0 {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: keb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %ret = call float @foo() #0
+
+ %cmp0 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp1 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val1,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp3 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val3,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp4 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val4,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp5 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val5,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp6 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val6,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp7 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val7,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp8 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val8,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp9 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val9,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp10 = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %ret, float %val10,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+
+ %sel0 = select i1 %cmp0, float %ret, float 0.0
+ %sel1 = select i1 %cmp1, float %sel0, float 1.0
+ %sel2 = select i1 %cmp2, float %sel1, float 2.0
+ %sel3 = select i1 %cmp3, float %sel2, float 3.0
+ %sel4 = select i1 %cmp4, float %sel3, float 4.0
+ %sel5 = select i1 %cmp5, float %sel4, float 5.0
+ %sel6 = select i1 %cmp6, float %sel5, float 6.0
+ %sel7 = select i1 %cmp7, float %sel6, float 7.0
+ %sel8 = select i1 %cmp8, float %sel7, float 8.0
+ %sel9 = select i1 %cmp9, float %sel8, float 9.0
+ %sel10 = select i1 %cmp10, float %sel9, float 10.0
+
+ ret float %sel10
+}
+
+; Check comparison with zero - cannot use LOAD AND TEST.
+define i64 @f8(i64 %a, i64 %b, float %f) #0 {
+; CHECK-LABEL: f8:
+; CHECK: lzer [[REG:%f[0-9]+]]
+; CHECK-NEXT: kebr %f0, [[REG]]
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows KEB to be used,
+; first with oeq.
+define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f9:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then one.
+define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f10:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: blhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnlh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then olt.
+define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f11:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ole.
+define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f12:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bher %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnhe %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then oge.
+define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f13:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bler %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnle %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ogt.
+define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f14:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: blr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ueq.
+define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f15:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnlhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrlh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then une.
+define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f16:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bner %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgre %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ult.
+define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f17:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnler %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrle %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ule.
+define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f18:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnlr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrl %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then uge.
+define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f19:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnhr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrh %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; ...then ugt.
+define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) #0 {
+; CHECK-LABEL: f20:
+; CHECK: keb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: bnher %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrhe %r2, %r3
+; CHECK: br %r14
+ %f1 = load float, float *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll
new file mode 100644
index 00000000000..4d4b66b4fcd
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-02.ll
@@ -0,0 +1,249 @@
+; Test 64-bit floating-point signaling comparison. The tests assume a z10
+; implementation of select, using conditional branches rather than LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @foo()
+
+; Check comparison with registers.
+define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: kdbr %f0, %f2
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the low end of the KDB range.
+define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK: kdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the high end of the aligned KDB range.
+define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) #0 {
+; CHECK-LABEL: f3:
+; CHECK: kdb %f0, 4088(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) #0 {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r4, 4096
+; CHECK: kdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) #0 {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r4, -8
+; CHECK: kdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that KDB allows indices.
+define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) #0 {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r5, 3
+; CHECK: kdb %f0, 800(%r1,%r4)
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that comparisons of spilled values can use KDB rather than KDBR.
+define double @f7(double *%ptr0) #0 {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: kdb {{%f[0-9]+}}, 160(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %ret = call double @foo() #0
+
+ %cmp0 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp1 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val1,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp2 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp3 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val3,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp4 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val4,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp5 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val5,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp6 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val6,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp7 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val7,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp8 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val8,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp9 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val9,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %cmp10 = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %ret, double %val10,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+
+ %sel0 = select i1 %cmp0, double %ret, double 0.0
+ %sel1 = select i1 %cmp1, double %sel0, double 1.0
+ %sel2 = select i1 %cmp2, double %sel1, double 2.0
+ %sel3 = select i1 %cmp3, double %sel2, double 3.0
+ %sel4 = select i1 %cmp4, double %sel3, double 4.0
+ %sel5 = select i1 %cmp5, double %sel4, double 5.0
+ %sel6 = select i1 %cmp6, double %sel5, double 6.0
+ %sel7 = select i1 %cmp7, double %sel6, double 7.0
+ %sel8 = select i1 %cmp8, double %sel7, double 8.0
+ %sel9 = select i1 %cmp9, double %sel8, double 9.0
+ %sel10 = select i1 %cmp10, double %sel9, double 10.0
+
+ ret double %sel10
+}
+
+; Check comparison with zero - cannot use LOAD AND TEST.
+define i64 @f8(i64 %a, i64 %b, double %f) #0 {
+; CHECK-LABEL: f8:
+; CHECK: lzdr [[REG:%f[0-9]+]]
+; CHECK-NEXT: kdbr %f0, [[REG]]
+; CHECK-SCALAR-NEXT: ber %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check the comparison can be reversed if that allows KDB to be used,
+define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) #0 {
+; CHECK-LABEL: f9:
+; CHECK: kdb %f0, 0(%r4)
+; CHECK-SCALAR-NEXT: blr %r14
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
+; CHECK: br %r14
+ %f1 = load double, double *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll
new file mode 100644
index 00000000000..4c571b619bf
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-03.ll
@@ -0,0 +1,48 @@
+; Test 128-bit floating-point signaling comparison. The tests assume a z10
+; implementation of select, using conditional branches rather than LOCGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; There is no memory form of 128-bit comparison.
+define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r4)
+; CHECK-DAG: ld %f3, 8(%r4)
+; CHECK: kxbr %f1, %f0
+; CHECK-NEXT: ber %r14
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+ %f2x = fpext float %f2 to fp128
+ %f1 = load fp128, fp128 *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f128(
+ fp128 %f1, fp128 %f2x,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check comparison with zero - cannot use LOAD AND TEST.
+define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: ld %f0, 0(%r4)
+; CHECK-DAG: ld %f2, 8(%r4)
+; CHECK-DAG: lzxr [[REG:%f[0-9]+]]
+; CHECK-NEXT: kxbr %f0, [[REG]]
+; CHECK-NEXT: ber %r14
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+ %f = load fp128, fp128 *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f128(
+ fp128 %f, fp128 0xL00000000000000000000000000000000,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll
new file mode 100644
index 00000000000..47c0d5caa37
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-04.ll
@@ -0,0 +1,148 @@
+; Verify that floating-point strict signaling compares cannot be omitted
+; even if CC already has the right value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: -enable-misched=0 -no-integrated-as | FileCheck %s
+;
+; We need -enable-misched=0 to make sure f12 and following routines really
+; test the compare elimination pass.
+
+
+declare float @llvm.fabs.f32(float %f)
+
+; Test addition followed by EQ, which could use the CC result of the addition.
+define float @f1(float %a, float %b, float *%dest) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: aebr %f0, %f2
+; CHECK-DAG: lzer [[REG:%f[0-9]+]]
+; CHECK-NEXT: kebr %f0, [[REG]]
+; CHECK-NEXT: ber %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %a, float %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %res, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %b, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test the result of LOAD POSITIVE.
+define float @f6(float %dummy, float %a, float *%dest) #0 {
+; CHECK-LABEL: f6:
+; CHECK-DAG: lpdfr %f0, %f2
+; CHECK-DAG: lzer [[REG:%f[0-9]+]]
+; CHECK-NEXT: kebr %f0, [[REG]]
+; CHECK-NEXT: bhr %r14
+; CHECK: br %r14
+entry:
+ %res = call float @llvm.fabs.f32(float %a)
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %res, float 0.0,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %res, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test the result of LOAD NEGATIVE.
+define float @f7(float %dummy, float %a, float *%dest) #0 {
+; CHECK-LABEL: f7:
+; CHECK-DAG: lndfr %f0, %f2
+; CHECK-DAG: lzer [[REG:%f[0-9]+]]
+; CHECK-NEXT: kebr %f0, [[REG]]
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %abs = call float @llvm.fabs.f32(float %a)
+ %res = fneg float %abs
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %res, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %res, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test the result of LOAD COMPLEMENT.
+define float @f8(float %dummy, float %a, float *%dest) #0 {
+; CHECK-LABEL: f8:
+; CHECK-DAG: lcdfr %f0, %f2
+; CHECK-DAG: lzer [[REG:%f[0-9]+]]
+; CHECK-NEXT: kebr %f0, [[REG]]
+; CHECK-NEXT: bler %r14
+; CHECK: br %r14
+entry:
+ %res = fneg float %a
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %res, float 0.0,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ store float %res, float *%dest
+ br label %exit
+
+exit:
+ ret float %res
+}
+
+; Test that LER does not get converted to LTEBR.
+define float @f12(float %dummy, float %val) #0 {
+; CHECK-LABEL: f12:
+; CHECK: ler %f0, %f2
+; CHECK-NEXT: #APP
+; CHECK-NEXT: blah %f0
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lzer [[REG:%f[0-9]+]]
+; CHECK-NEXT: kebr %f2, [[REG]]
+; CHECK-NEXT: blr %r14
+; CHECK: br %r14
+entry:
+ %ret = call float asm "blah $1", "=f,{f0}"(float %val)
+ %cmp = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %val, float 0.0,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ br i1 %cmp, label %exit, label %store
+
+store:
+ call void asm sideeffect "blah", ""()
+ br label %exit
+
+exit:
+ ret float %ret
+}
+
+attributes #0 = { strictfp }
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata)
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll
new file mode 100644
index 00000000000..ac677b29619
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-05.ll
@@ -0,0 +1,103 @@
+; Test that floating-point instructions that set cc are *not* used to
+; eliminate *strict* signaling compares for load complement, load negative
+; and load positive
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Load complement (sign-bit flipped).
+; Test f32
+define float @f1(float %a, float %b, float %f) #0 {
+; CHECK-LABEL: f1:
+; CHECK: kebr
+; CHECK-NEXT: ber %r14
+ %neg = fneg float %f
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %neg, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+define double @f2(double %a, double %b, double %f) #0 {
+; CHECK-LABEL: f2:
+; CHECK: kdbr
+; CHECK-NEXT: ber %r14
+ %neg = fneg double %f
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %neg, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+; Negation of floating-point absolute.
+; Test f32
+declare float @llvm.fabs.f32(float %f)
+define float @f3(float %a, float %b, float %f) #0 {
+; CHECK-LABEL: f3:
+; CHECK: kebr
+; CHECK-NEXT: ber %r14
+ %abs = call float @llvm.fabs.f32(float %f)
+ %neg = fneg float %abs
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %neg, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+declare double @llvm.fabs.f64(double %f)
+define double @f4(double %a, double %b, double %f) #0 {
+; CHECK-LABEL: f4:
+; CHECK: kdbr
+; CHECK-NEXT: ber %r14
+ %abs = call double @llvm.fabs.f64(double %f)
+ %neg = fneg double %abs
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %neg, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+; Absolute floating-point value.
+; Test f32
+define float @f5(float %a, float %b, float %f) #0 {
+; CHECK-LABEL: f5:
+; CHECK: kebr
+; CHECK-NEXT: ber %r14
+ %abs = call float @llvm.fabs.f32(float %f)
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %abs, float 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+define double @f6(double %a, double %b, double %f) #0 {
+; CHECK-LABEL: f6:
+; CHECK: kdbr
+; CHECK-NEXT: ber %r14
+ %abs = call double @llvm.fabs.f64(double %f)
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %abs, double 0.0,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll
new file mode 100644
index 00000000000..37c18d83680
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-cmps-06.ll
@@ -0,0 +1,44 @@
+; Test f128 signaling comparisons on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; There is no memory form of 128-bit comparison.
+define i64 @f1(i64 %a, i64 %b, fp128 *%ptr1, fp128 *%ptr2) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r5)
+; CHECK: wfkxb [[REG1]], [[REG2]]
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f128(
+ fp128 %f1, fp128 %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check comparison with zero -- it is not worthwhile to copy to
+; FP pairs just so we can use LTXBR, so simply load up a zero.
+define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r4)
+; CHECK-DAG: vzero [[REG2:%v[0-9]+]]
+; CHECK: wfkxb [[REG1]], [[REG2]]
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f = load fp128, fp128 *%ptr
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f128(
+ fp128 %f, fp128 0xL00000000000000000000000000000000,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll b/llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll
new file mode 100644
index 00000000000..dc8d0090db2
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-cmp-01.ll
@@ -0,0 +1,560 @@
+; Test strict v4f32 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test oeq.
+define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test one.
+define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vo %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ogt.
+define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f3:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oge.
+define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f4:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ole.
+define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test olt.
+define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f6:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ueq.
+define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vno %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test une.
+define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f8:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ugt.
+define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f9:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uge.
+define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f10:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ule.
+define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f11:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ult.
+define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f12:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ord.
+define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vo %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uno.
+define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vno %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oeq selects.
+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f15:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test one selects.
+define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f16:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ogt selects.
+define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f17:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test oge selects.
+define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f18:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ole selects.
+define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f19:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test olt selects.
+define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f20:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ueq selects.
+define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f21:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test une selects.
+define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f22:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ugt selects.
+define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f23:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uge selects.
+define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f24:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ule selects.
+define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f25:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ult selects.
+define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f26:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ord selects.
+define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f27:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uno selects.
+define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f28:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+attributes #0 = { strictfp }
+
+declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata)
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll
new file mode 100644
index 00000000000..2e8df56fda1
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-cmp-02.ll
@@ -0,0 +1,442 @@
+; Test f64 and v2f64 strict comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test oeq.
+define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: vfcedb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test one.
+define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ogt.
+define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f3:
+; CHECK: vfchdb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test oge.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f4:
+; CHECK: vfchedb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ole.
+define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f5:
+; CHECK: vfchedb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test olt.
+define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f6:
+; CHECK: vfchdb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ueq.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test une.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ugt.
+define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f9:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uge.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f10:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ule.
+define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ult.
+define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f12:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ord.
+define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uno.
+define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test oeq selects.
+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f15:
+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test one selects.
+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f16:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ogt selects.
+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f17:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test oge selects.
+define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f18:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ole selects.
+define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f19:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test olt selects.
+define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f20:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ueq selects.
+define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f21:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test une selects.
+define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f22:
+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ugt selects.
+define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f23:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test uge selects.
+define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f24:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ule selects.
+define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f25:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ult selects.
+define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f26:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ord selects.
+define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f27:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test uno selects.
+define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f28:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test an f64 comparison that uses vector registers.
+define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 {
+; CHECK-LABEL: f29:
+; CHECK: wfcdb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = extractelement <2 x double> %vec, i32 0
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll b/llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll
new file mode 100644
index 00000000000..a40720977ed
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-cmp-03.ll
@@ -0,0 +1,442 @@
+; Test strict f32 and v4f32 comparisons on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test oeq.
+define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: vfcesb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test one.
+define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ogt.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f3:
+; CHECK: vfchsb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oge.
+define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f4:
+; CHECK: vfchesb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ole.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f5:
+; CHECK: vfchesb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test olt.
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f6:
+; CHECK: vfchsb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ueq.
+define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test une.
+define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: vfcesb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ugt.
+define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f9:
+; CHECK: vfchesb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uge.
+define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f10:
+; CHECK: vfchsb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ule.
+define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ult.
+define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f12:
+; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ord.
+define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uno.
+define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oeq selects.
+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f15:
+; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test one selects.
+define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f16:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ogt selects.
+define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f17:
+; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test oge selects.
+define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f18:
+; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ole selects.
+define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f19:
+; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test olt selects.
+define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f20:
+; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ueq selects.
+define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f21:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchsb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test une selects.
+define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f22:
+; CHECK: vfcesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ugt selects.
+define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f23:
+; CHECK: vfchesb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uge selects.
+define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f24:
+; CHECK: vfchsb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ule selects.
+define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f25:
+; CHECK: vfchsb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ult selects.
+define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f26:
+; CHECK: vfchesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ord selects.
+define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f27:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uno selects.
+define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f28:
+; CHECK-DAG: vfchsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchesb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test an f32 comparison that uses vector registers.
+define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) #0 {
+; CHECK-LABEL: f29:
+; CHECK: wfcsb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = extractelement <4 x float> %vec, i32 0
+ %cond = call i1 @llvm.experimental.constrained.fcmp.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll b/llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll
new file mode 100644
index 00000000000..e55f01c3f1d
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-cmps-01.ll
@@ -0,0 +1,442 @@
+; Test signaling f32 and v4f32 comparisons on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test oeq.
+define <4 x i32> @f1(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: vfkesb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test one.
+define <4 x i32> @f2(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ogt.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f3:
+; CHECK: vfkhsb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oge.
+define <4 x i32> @f4(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f4:
+; CHECK: vfkhesb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ole.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f5:
+; CHECK: vfkhesb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test olt.
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f6:
+; CHECK: vfkhsb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ueq.
+define <4 x i32> @f7(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test une.
+define <4 x i32> @f8(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: vfkesb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ugt.
+define <4 x i32> @f9(<4 x i32> %dummy, <4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f9:
+; CHECK: vfkhesb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uge.
+define <4 x i32> @f10(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f10:
+; CHECK: vfkhsb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ule.
+define <4 x i32> @f11(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: vfkhsb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ult.
+define <4 x i32> @f12(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f12:
+; CHECK: vfkhesb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ord.
+define <4 x i32> @f13(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uno.
+define <4 x i32> @f14(<4 x i32> %dummy, <4 x float> %val1,
+ <4 x float> %val2) #0 {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oeq selects.
+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f15:
+; CHECK: vfkesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test one selects.
+define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f16:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ogt selects.
+define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f17:
+; CHECK: vfkhsb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test oge selects.
+define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f18:
+; CHECK: vfkhesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ole selects.
+define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f19:
+; CHECK: vfkhesb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test olt selects.
+define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f20:
+; CHECK: vfkhsb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ueq selects.
+define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f21:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhsb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test une selects.
+define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f22:
+; CHECK: vfkesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ugt selects.
+define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f23:
+; CHECK: vfkhesb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uge selects.
+define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f24:
+; CHECK: vfkhsb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ule selects.
+define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f25:
+; CHECK: vfkhsb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ult selects.
+define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f26:
+; CHECK: vfkhesb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ord selects.
+define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f27:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uno selects.
+define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) #0 {
+; CHECK-LABEL: f28:
+; CHECK-DAG: vfkhsb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhesb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test an f32 comparison that uses vector registers.
+define i64 @f29(i64 %a, i64 %b, float %f1, <4 x float> %vec) #0 {
+; CHECK-LABEL: f29:
+; CHECK: wfksb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = extractelement <4 x float> %vec, i32 0
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f32(
+ float %f1, float %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll b/llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll
new file mode 100644
index 00000000000..0a09fdf2543
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-cmps-02.ll
@@ -0,0 +1,442 @@
+; Test f64 and v2f64 signaling comparisons on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test oeq.
+define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: vfkedb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test one.
+define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ogt.
+define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f3:
+; CHECK: vfkhdb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test oge.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f4:
+; CHECK: vfkhedb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ole.
+define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f5:
+; CHECK: vfkhedb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test olt.
+define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f6:
+; CHECK: vfkhdb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ueq.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test une.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f8:
+; CHECK: vfkedb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ugt.
+define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f9:
+; CHECK: vfkhedb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uge.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f10:
+; CHECK: vfkhdb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ule.
+define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f11:
+; CHECK: vfkhdb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ult.
+define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f12:
+; CHECK: vfkhedb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ord.
+define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uno.
+define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) #0 {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test oeq selects.
+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f15:
+; CHECK: vfkedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test one selects.
+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f16:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"one",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ogt selects.
+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f17:
+; CHECK: vfkhdb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ogt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test oge selects.
+define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f18:
+; CHECK: vfkhedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ole selects.
+define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f19:
+; CHECK: vfkhedb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ole",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test olt selects.
+define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f20:
+; CHECK: vfkhdb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"olt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ueq selects.
+define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f21:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhdb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ueq",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test une selects.
+define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f22:
+; CHECK: vfkedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"une",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ugt selects.
+define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f23:
+; CHECK: vfkhedb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ugt",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test uge selects.
+define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f24:
+; CHECK: vfkhdb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uge",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ule selects.
+define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f25:
+; CHECK: vfkhdb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ule",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ult selects.
+define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f26:
+; CHECK: vfkhedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ult",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ord selects.
+define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f27:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"ord",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test uno selects.
+define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) #0 {
+; CHECK-LABEL: f28:
+; CHECK-DAG: vfkhdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfkhedb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"uno",
+ metadata !"fpexcept.strict") #0
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test an f64 comparison that uses vector registers.
+define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 {
+; CHECK-LABEL: f29:
+; CHECK: wfkdb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = extractelement <2 x double> %vec, i32 0
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll b/llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll
new file mode 100644
index 00000000000..ca4d0768bf7
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-strict-cmps-03.ll
@@ -0,0 +1,56 @@
+; Test signaling vector floating-point comparisons on z13.
+; Note that these must be scalarized as we do not have native instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v4f32.
+define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) #0 {
+; CHECK-LABEL: f1:
+; CHECK: kebr
+; CHECK: kebr
+; CHECK: kebr
+; CHECK: kebr
+; CHECK: br %r14
+ %cmp = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test v2f64.
+define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) #0 {
+; CHECK-LABEL: f2:
+; CHECK: {{kdbr|wfkdb}}
+; CHECK: {{kdbr|wfkdb}}
+; CHECK: br %r14
+ %cmp = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test an f64 comparison that uses vector registers.
+define i64 @f3(i64 %a, i64 %b, double %f1, <2 x double> %vec) #0 {
+; CHECK-LABEL: f3:
+; CHECK: wfkdb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = extractelement <2 x double> %vec, i32 0
+ %cond = call i1 @llvm.experimental.constrained.fcmps.f64(
+ double %f1, double %f2,
+ metadata !"oeq",
+ metadata !"fpexcept.strict") #0
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+attributes #0 = { strictfp }
+
+declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
+
OpenPOWER on IntegriCloud