summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/docs/LangRef.rst95
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h5
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h1
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td6
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp142
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp56
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/IR/Verifier.cpp13
-rw-r--r--llvm/test/CodeGen/X86/smul_fix_sat.ll739
-rw-r--r--llvm/test/CodeGen/X86/smul_fix_sat_constants.ll101
16 files changed, 1150 insertions, 25 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 3f35752b450..503b8989207 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -13278,6 +13278,31 @@ are useful for representing fractional values to a specific precision. The
following intrinsics perform fixed point arithmetic operations on 2 operands
of the same scale, specified as the third argument.
+The `llvm.*mul.fix` family of intrinsic functions represents a multiplication
+of fixed point numbers through scaled integers. Therefore, fixed point
+multplication can be represented as
+
+::
+ %result = call i4 @llvm.smul.fix.i4(i4 %a, i4 %b, i32 %scale)
+ =>
+ %a2 = sext i4 %a to i8
+ %b2 = sext i4 %b to i8
+ %mul = mul nsw nuw i8 %a, %b
+ %scale2 = trunc i32 %scale to i8
+ %r = ashr i8 %mul, i8 %scale2 ; this is for a target rounding down towards negative infinity
+ %result = trunc i8 %r to i4
+
+For each of these functions, if the result cannot be represented exactly with
+the provided scale, the result is rounded. Rounding is unspecified since
+preferred rounding may vary for different targets. Rounding is specified
+through a target hook. Different pipelines should legalize or optimize this
+using the rounding specified by this hook if it is provided. Operations like
+constant folding, instruction combining, KnownBits, and ValueTracking should
+also use this hook, if provided, and not assume the direction of rounding. A
+rounded result must always be within one unit of precision from the true
+result. That is, the error between the returned result and the true result must
+be less than 1/2^(scale).
+
'``llvm.smul.fix.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -13398,6 +13423,76 @@ Examples
%res = call i4 @llvm.umul.fix.i4(i4 15, i4 1, i32 1) ; %res = 7 (or 8) (7.5 x 0.5 = 3.75)
+'``llvm.smul.fix.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.smul.fix.sat``
+on any integer bit width or vectors of integers.
+
+::
+
+ declare i16 @llvm.smul.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+ declare i32 @llvm.smul.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+ declare i64 @llvm.smul.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+ declare <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+Overview
+"""""""""
+
+The '``llvm.smul.fix.sat``' family of intrinsic functions perform signed
+fixed point saturation multiplication on 2 arguments of the same scale.
+
+Arguments
+""""""""""
+
+The arguments (%a and %b) and the result may be of integer types of any bit
+width, but they must have the same bit width. ``%a`` and ``%b`` are the two
+values that will undergo signed fixed point multiplication. The argument
+``%scale`` represents the scale of both operands, and must be a constant
+integer.
+
+Semantics:
+""""""""""
+
+This operation performs fixed point multiplication on the 2 arguments of a
+specified scale. The result will also be returned in the same scale specified
+in the third argument.
+
+If the result value cannot be precisely represented in the given scale, the
+value is rounded up or down to the closest representable value. The rounding
+direction is unspecified.
+
+The maximum value this operation can clamp to is the largest signed value
+representable by the bit width of the first 2 arguments. The minimum value is the
+smallest signed value representable by this bit width.
+
+
+Examples
+"""""""""
+
+.. code-block:: llvm
+
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 2, i32 0) ; %res = 6 (2 x 3 = 6)
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 2, i32 1) ; %res = 3 (1.5 x 1 = 1.5)
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 -2, i32 1) ; %res = -3 (1.5 x -1 = -1.5)
+
+ ; The result in the following could be rounded up to -2 or down to -2.5
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 -3, i32 1) ; %res = -5 (or -4) (1.5 x -1.5 = -2.25)
+
+ ; Saturation
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 7, i4 2, i32 0) ; %res = 7
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 7, i4 2, i32 2) ; %res = 7
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 -8, i4 2, i32 2) ; %res = -8
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 -8, i4 -2, i32 2) ; %res = 7
+
+ ; Scale can affect the saturation result
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 2, i4 4, i32 0) ; %res = 7 (2 x 4 -> clamped to 7)
+ %res = call i4 @llvm.smul.fix.sat.i4(i4 2, i4 4, i32 1) ; %res = 4 (1 x 2 = 2)
+
+
Specialised Arithmetic Intrinsics
---------------------------------
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 0f798b5d20c..9b765299b10 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -278,6 +278,11 @@ namespace ISD {
/// multiplication on 2 integers.
SMULFIX, UMULFIX,
+ /// Same as the corresponding unsaturated fixed point instructions, but the
+ /// result is clamped between the min and max values representable by the
+ /// bits of the first 2 operands.
+ SMULFIXSAT,
+
/// Simple binary floating point operators.
FADD, FSUB, FMUL, FDIV, FREM,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index b65612cb25b..7f4b2bad803 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -855,6 +855,7 @@ public:
default:
llvm_unreachable("Unexpected fixed point operation.");
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX:
Supported = isSupportedFixedPointOperation(Op, VT, Scale);
break;
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 2957478ef5b..b329d5c3eb8 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -874,6 +874,12 @@ def int_umul_fix : Intrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+//===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===//
+//
+def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative, ImmArg<2>]>;
+
//===------------------------- Memory Use Markers -------------------------===//
//
def int_lifetime_start : Intrinsic<[],
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 6b1ef477bfa..28a2eb0727a 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -391,6 +391,7 @@ def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>;
def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>;
def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
+def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
def umulfix : SDNode<"ISD::UMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>;
def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 63d407cfd5f..52ae1e01a9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1140,6 +1140,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
break;
}
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
@@ -3334,6 +3335,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 0930b63eecd..357654fb1af 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -149,6 +149,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
@@ -670,14 +671,35 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// Can just promote the operands then continue with operation.
SDLoc dl(N);
SDValue Op1Promoted, Op2Promoted;
- if (N->getOpcode() == ISD::SMULFIX) {
+ bool Signed =
+ N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
} else {
Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
}
+ EVT OldType = N->getOperand(0).getValueType();
EVT PromotedType = Op1Promoted.getValueType();
+ unsigned DiffSize =
+ PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
+
+ bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+ if (Saturating) {
+ // Promoting the operand and result values changes the saturation width,
+ // which is extends the values that we clamp to on saturation. This could be
+ // resolved by shifting one of the operands the same amount, which would
+ // also shift the result we compare against, then shifting back.
+ EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ }
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
}
@@ -1125,6 +1147,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1688,7 +1711,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
+
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
@@ -2712,19 +2737,40 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SplitInteger(Result, Lo, Hi);
}
+/// This performs an expansion of the integer result for a fixed point
+/// multiplication. The default expansion performs rounding down towards
+/// negative infinity, though targets that do care about rounding should specify
+/// a target hook for rounding and provide their own expansion or lowering of
+/// fixed point multiplication to be consistent with rounding.
void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- assert(
- (N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::UMULFIX) &&
- "Expected operand to be signed or unsigned fixed point multiplication");
-
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ unsigned VTSize = VT.getScalarSizeInBits();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
+ bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+ EVT BoolVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
if (!Scale) {
- SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ SDValue Result;
+ if (!Saturating) {
+ Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else {
+ Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ }
SplitInteger(Result, Lo, Hi);
return;
}
@@ -2735,7 +2781,8 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- bool Signed = N->getOpcode() == ISD::SMULFIX;
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
@@ -2744,8 +2791,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
return;
}
- unsigned VTSize = VT.getScalarSizeInBits();
unsigned NVTSize = NVT.getScalarSizeInBits();
+ assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
+ "the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
// Shift whole amount by scale.
@@ -2754,6 +2802,12 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue ResultHL = Result[2];
SDValue ResultHH = Result[3];
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), NVT);
+
// After getting the multplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
@@ -2782,11 +2836,60 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of
+ // saturation.
+ // The number of overflow bits we can check are VTSize - Scale + 1 (we
+ // include the sign bit). If these top bits are > 0, then we overflowed past
+ // the max value. If these top bits are < -1, then we overflowed past the
+ // min value. Otherwise, we did not overflow.
+ if (Saturating) {
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+
+ // HH > 0 or HH == 0 && HL > HLLoMask
+ SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLPos =
+ DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
+
+ // HH < -1 or HH == -1 && HL < HLHiMask
+ SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLNeg =
+ DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
+ }
} else if (Scale == NVTSize) {
// If the scales are equal, Lo and Hi are ResultLH and Result HL,
// respectively. Avoid shifting to prevent undefined behavior.
Lo = ResultLH;
Hi = ResultHL;
+
+ // We overflow max if HH > 0 or HH == 0 && HL sign is negative.
+ // We overflow min if HH < -1 or HH == -1 && HL sign is 0.
+ if (Saturating) {
+ SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
+
+ SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
+ }
} else if (Scale < VTSize) {
// If the scale is instead less than the old VT size, but greater than or
// equal to the expanded VT size, the first part of the result (ResultLL) is
@@ -2801,6 +2904,19 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+
+ // This is similar to the case when we saturate if Scale < NVTSize, but we
+ // only need to chech HH.
+ if (Saturating) {
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ }
} else if (Scale == VTSize) {
assert(
!Signed &&
@@ -2812,6 +2928,16 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
llvm_unreachable("Expected the scale to be less than or equal to the width "
"of the operands");
}
+
+ if (Saturating) {
+ APInt LHMax = APInt::getSignedMaxValue(NVTSize);
+ APInt LLMax = APInt::getAllOnesValue(NVTSize);
+ APInt LHMin = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
+ }
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ad2e398dff1..f77ccd994da 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -438,6 +438,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index add97ec1057..8570f57616e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -183,6 +183,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_OverflowOp(N, ResNo);
break;
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX:
R = ScalarizeVecRes_MULFIX(N);
break;
@@ -971,6 +972,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
break;
case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
case ISD::UMULFIX:
SplitVecRes_MULFIX(N, Lo, Hi);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 31410f208ac..5ac9d796f78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6298,6 +6298,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op1.getValueType(), Op1, Op2, Op3));
return;
}
+ case Intrinsic::smul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index cbef6cc24f7..28416336578 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -301,7 +301,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UADDSAT: return "uaddsat";
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
+
case ISD::SMULFIX: return "smulfix";
+ case ISD::SMULFIXSAT: return "smulfixsat";
case ISD::UMULFIX: return "umulfix";
// Conversion operators.
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f07180a2faa..ac45f4e08a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5695,25 +5695,42 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
- Node->getOpcode() == ISD::UMULFIX) &&
- "Expected opcode to be SMULFIX or UMULFIX.");
+ Node->getOpcode() == ISD::UMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT) &&
+ "Expected a fixed point multiplication opcode");
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
unsigned Scale = Node->getConstantOperandVal(2);
+ bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ unsigned VTSize = VT.getScalarSizeInBits();
- // [us]mul.fix(a, b, 0) -> mul(a, b)
if (!Scale) {
- if (VT.isVector() && !isOperationLegalOrCustom(ISD::MUL, VT))
- return SDValue();
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ // [us]mul.fix(a, b, 0) -> mul(a, b)
+ if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ }
}
- unsigned VTSize = VT.getScalarSizeInBits();
- bool Signed = Node->getOpcode() == ISD::SMULFIX;
-
+ bool Signed =
+ Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
"Expected scale to be less than the number of bits if signed or at "
"most the number of bits if unsigned.");
@@ -5746,8 +5763,25 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
// are scaled. The result is given to us in 2 halves, so we only want part of
// both in the result.
EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
- return DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
- DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ if (!Saturating)
+ return Result;
+
+ unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
+ SDValue HiMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
+ SDValue LoMask = DAG.getConstant(
+ APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+
+ Result = DAG.getSelectCC(dl, Hi, LoMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETGT);
+ return DAG.getSelectCC(dl, Hi, HiMask,
+ DAG.getConstant(MinVal, dl, VT), Result,
+ ISD::SETLT);
}
void TargetLowering::expandUADDSUBO(
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index eeb8aa1374f..32f97f7e2aa 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -623,6 +623,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
+ setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
// Overflow operations default to expand
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 67d43ce7740..fc8d210e67a 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4595,27 +4595,28 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
case Intrinsic::umul_fix: {
Value *Op1 = Call.getArgOperand(0);
Value *Op2 = Call.getArgOperand(1);
Assert(Op1->getType()->isIntOrIntVectorTy(),
- "first operand of [us]mul_fix must be an int type or vector "
+ "first operand of [us]mul_fix[_sat] must be an int type or vector "
"of ints");
Assert(Op2->getType()->isIntOrIntVectorTy(),
- "second operand of [us]mul_fix must be an int type or vector "
+ "second operand of [us]mul_fix_[sat] must be an int type or vector "
"of ints");
auto *Op3 = cast<ConstantInt>(Call.getArgOperand(2));
Assert(Op3->getType()->getBitWidth() <= 32,
- "third argument of [us]mul_fix must fit within 32 bits");
+ "third argument of [us]mul_fix[_sat] must fit within 32 bits");
- if (ID == Intrinsic::smul_fix) {
+ if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat) {
Assert(
Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
- "the scale of smul_fix must be less than the width of the operands");
+ "the scale of smul_fix[_sat] must be less than the width of the operands");
} else {
Assert(Op3->getZExtValue() <= Op1->getType()->getScalarSizeInBits(),
- "the scale of umul_fix must be less than or equal to the width of "
+ "the scale of umul_fix[_sat] must be less than or equal to the width of "
"the operands");
}
break;
diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll
new file mode 100644
index 00000000000..44603703c51
--- /dev/null
+++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll
@@ -0,0 +1,739 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+
+declare i4 @llvm.smul.fix.sat.i4 (i4, i4, i32)
+declare i32 @llvm.smul.fix.sat.i32 (i32, i32, i32)
+declare i64 @llvm.smul.fix.sat.i64 (i64, i64, i32)
+declare <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
+
+define i32 @func(i32 %x, i32 %y) nounwind {
+; X64-LABEL: func:
+; X64: # %bb.0:
+; X64-NEXT: movslq %esi, %rax
+; X64-NEXT: movslq %edi, %rcx
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: shrq $32, %rax
+; X64-NEXT: shrdl $2, %eax, %ecx
+; X64-NEXT: cmpl $1, %eax
+; X64-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF
+; X64-NEXT: cmovlel %ecx, %edx
+; X64-NEXT: cmpl $-2, %eax
+; X64-NEXT: movl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT: cmovgel %edx, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: func:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: shrdl $2, %edx, %eax
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovgl %ecx, %eax
+; X86-NEXT: cmpl $-2, %edx
+; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
+; X86-NEXT: cmovll %ecx, %eax
+; X86-NEXT: retl
+ %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 2);
+ ret i32 %tmp;
+}
+
+define i64 @func2(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func2:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: imulq %rsi
+; X64-NEXT: shrdq $2, %rdx, %rax
+; X64-NEXT: cmpq $1, %rdx
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: cmpq $-2, %rdx
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: imull %esi
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: adcl $0, %ecx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: subl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovnsl %ecx, %edi
+; X86-NEXT: cmovnsl %edx, %esi
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %edi, %ebp
+; X86-NEXT: sbbl $0, %ebp
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %edi, %ebp
+; X86-NEXT: cmovnsl %esi, %ecx
+; X86-NEXT: testl %ebp, %ebp
+; X86-NEXT: setg %bh
+; X86-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: seta %bl
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shldl $30, %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $30, %esi, %eax
+; X86-NEXT: andb {{[-0-9]+}}(%e{{[sb]}}p), %bl # 1-byte Folded Reload
+; X86-NEXT: orb %bh, %bl
+; X86-NEXT: testb %bl, %bl
+; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
+; X86-NEXT: cmovnel %esi, %edx
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: cmovnel %esi, %eax
+; X86-NEXT: cmpl $-1, %ebp
+; X86-NEXT: setl %bl
+; X86-NEXT: sete %bh
+; X86-NEXT: cmpl $-2, %ecx
+; X86-NEXT: setb %cl
+; X86-NEXT: andb %bh, %cl
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: orb %bl, %cl
+; X86-NEXT: cmovnel %esi, %eax
+; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 2);
+ ret i64 %tmp;
+}
+
+define i4 @func3(i4 %x, i4 %y) nounwind {
+; X64-LABEL: func3:
+; X64: # %bb.0:
+; X64-NEXT: shlb $4, %sil
+; X64-NEXT: sarb $4, %sil
+; X64-NEXT: shlb $4, %dil
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: movsbl %sil, %ecx
+; X64-NEXT: imull %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
+; X64-NEXT: shrb $2, %al
+; X64-NEXT: shrl $8, %ecx
+; X64-NEXT: movl %ecx, %edx
+; X64-NEXT: shlb $6, %dl
+; X64-NEXT: orb %al, %dl
+; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: cmpb $1, %cl
+; X64-NEXT: movl $127, %edx
+; X64-NEXT: cmovlel %eax, %edx
+; X64-NEXT: cmpb $-2, %cl
+; X64-NEXT: movl $128, %eax
+; X64-NEXT: cmovgel %edx, %eax
+; X64-NEXT: sarb $4, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: func3:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb $4, %al
+; X86-NEXT: sarb $4, %al
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: shlb $4, %cl
+; X86-NEXT: movsbl %cl, %ecx
+; X86-NEXT: movsbl %al, %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: movb %ah, %cl
+; X86-NEXT: shlb $6, %cl
+; X86-NEXT: shrb $2, %al
+; X86-NEXT: orb %cl, %al
+; X86-NEXT: movzbl %al, %ecx
+; X86-NEXT: cmpb $1, %ah
+; X86-NEXT: movl $127, %edx
+; X86-NEXT: cmovlel %ecx, %edx
+; X86-NEXT: cmpb $-2, %ah
+; X86-NEXT: movl $128, %eax
+; X86-NEXT: cmovgel %edx, %eax
+; X86-NEXT: sarb $4, %al
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+ %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 2);
+ ret i4 %tmp;
+}
+
+define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec:
+; X64: # %bb.0:
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; X64-NEXT: movd %xmm2, %eax
+; X64-NEXT: cltq
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: movslq %ecx, %rdx
+; X64-NEXT: imulq %rax, %rdx
+; X64-NEXT: movq %rdx, %rcx
+; X64-NEXT: shrq $32, %rcx
+; X64-NEXT: shrdl $2, %ecx, %edx
+; X64-NEXT: cmpl $1, %ecx
+; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmovgl %eax, %edx
+; X64-NEXT: cmpl $-2, %ecx
+; X64-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
+; X64-NEXT: cmovll %ecx, %edx
+; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X64-NEXT: movd %xmm3, %edx
+; X64-NEXT: movslq %edx, %rdx
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; X64-NEXT: movd %xmm3, %esi
+; X64-NEXT: movslq %esi, %rsi
+; X64-NEXT: imulq %rdx, %rsi
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: shrq $32, %rdx
+; X64-NEXT: shrdl $2, %edx, %esi
+; X64-NEXT: cmpl $1, %edx
+; X64-NEXT: cmovgl %eax, %esi
+; X64-NEXT: cmpl $-2, %edx
+; X64-NEXT: cmovll %ecx, %esi
+; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT: movd %xmm1, %edx
+; X64-NEXT: movslq %edx, %rdx
+; X64-NEXT: movd %xmm0, %esi
+; X64-NEXT: movslq %esi, %rsi
+; X64-NEXT: imulq %rdx, %rsi
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: shrq $32, %rdx
+; X64-NEXT: shrdl $2, %edx, %esi
+; X64-NEXT: cmpl $1, %edx
+; X64-NEXT: cmovgl %eax, %esi
+; X64-NEXT: cmpl $-2, %edx
+; X64-NEXT: cmovll %ecx, %esi
+; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X64-NEXT: movd %xmm1, %edx
+; X64-NEXT: movslq %edx, %rdx
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X64-NEXT: movd %xmm0, %esi
+; X64-NEXT: movslq %esi, %rsi
+; X64-NEXT: imulq %rdx, %rsi
+; X64-NEXT: movq %rsi, %rdx
+; X64-NEXT: shrq $32, %rdx
+; X64-NEXT: shrdl $2, %edx, %esi
+; X64-NEXT: cmpl $1, %edx
+; X64-NEXT: cmovgl %eax, %esi
+; X64-NEXT: cmpl $-2, %edx
+; X64-NEXT: cmovll %ecx, %esi
+; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X64-NEXT: movdqa %xmm2, %xmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: vec:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrdl $2, %edx, %ecx
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF
+; X86-NEXT: cmovgl %ebp, %ecx
+; X86-NEXT: cmpl $-2, %edx
+; X86-NEXT: movl $-2147483648, %esi # imm = 0x80000000
+; X86-NEXT: cmovll %esi, %ecx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: shrdl $2, %edx, %edi
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: cmovgl %ebp, %edi
+; X86-NEXT: cmpl $-2, %edx
+; X86-NEXT: cmovll %esi, %edi
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: shrdl $2, %edx, %ebx
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: cmovgl %ebp, %ebx
+; X86-NEXT: cmpl $-2, %edx
+; X86-NEXT: cmovll %esi, %ebx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: shrdl $2, %edx, %eax
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: cmovgl %ebp, %eax
+; X86-NEXT: cmpl $-2, %edx
+; X86-NEXT: cmovll %esi, %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %eax, 12(%edx)
+; X86-NEXT: movl %ebx, 8(%edx)
+; X86-NEXT: movl %edi, 4(%edx)
+; X86-NEXT: movl %ecx, (%edx)
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2);
+ ret <4 x i32> %tmp;
+}
+
+; These result in regular integer multiplication
+define i32 @func4(i32 %x, i32 %y) nounwind {
+; X64-LABEL: func4:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: imull %esi, %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testl %ecx, %ecx
+; X64-NEXT: setns %al
+; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: imull %esi, %edi
+; X64-NEXT: cmovnol %edi, %eax
+; X64-NEXT: retq
+;
+; X86-LABEL: func4:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: imull %edx, %esi
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: setns %cl
+; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: imull %edx, %eax
+; X86-NEXT: cmovol %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+ %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 0);
+ ret i32 %tmp;
+}
+
+define i64 @func5(i64 %x, i64 %y) {
+; X64-LABEL: func5:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: imulq %rsi, %rax
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: setns %cl
+; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: addq %rcx, %rax
+; X64-NEXT: imulq %rsi, %rdi
+; X64-NEXT: cmovnoq %rdi, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func5:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: .cfi_offset %esi, -12
+; X86-NEXT: .cfi_offset %edi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl $0, (%esp)
+; X86-NEXT: movl %esp, %edi
+; X86-NEXT: pushl %edi
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl %edx
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl %ecx
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll __mulodi4
+; X86-NEXT: addl $20, %esp
+; X86-NEXT: .cfi_adjust_cfa_offset -20
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: setns %cl
+; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: cmpl $0, (%esp)
+; X86-NEXT: cmovnel %esi, %eax
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: popl %edi
+; X86-NEXT: .cfi_def_cfa_offset 4
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 0);
+ ret i64 %tmp;
+}
+
+define i4 @func6(i4 %x, i4 %y) nounwind {
+; X64-LABEL: func6:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shlb $4, %sil
+; X64-NEXT: sarb $4, %sil
+; X64-NEXT: shlb $4, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: imulb %sil
+; X64-NEXT: seto %cl
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: testb %al, %al
+; X64-NEXT: setns %dl
+; X64-NEXT: addl $127, %edx
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: testb %cl, %cl
+; X64-NEXT: cmovnel %edx, %eax
+; X64-NEXT: sarb $4, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; X86-LABEL: func6:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: shlb $4, %cl
+; X86-NEXT: sarb $4, %cl
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb $4, %al
+; X86-NEXT: imulb %cl
+; X86-NEXT: seto %dl
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: testb %al, %al
+; X86-NEXT: setns %cl
+; X86-NEXT: addl $127, %ecx
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: testb %dl, %dl
+; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: sarb $4, %al
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: retl
+ %tmp = call i4 @llvm.smul.fix.sat.i4(i4 %x, i4 %y, i32 0);
+ ret i4 %tmp;
+}
+
+define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec2:
+; X64: # %bb.0:
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; X64-NEXT: movd %xmm2, %ecx
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; X64-NEXT: movd %xmm2, %r8d
+; X64-NEXT: movl %r8d, %edx
+; X64-NEXT: imull %ecx, %edx
+; X64-NEXT: xorl %esi, %esi
+; X64-NEXT: testl %edx, %edx
+; X64-NEXT: setns %sil
+; X64-NEXT: addl $2147483647, %esi # imm = 0x7FFFFFFF
+; X64-NEXT: imull %ecx, %r8d
+; X64-NEXT: cmovol %esi, %r8d
+; X64-NEXT: movd %xmm1, %edx
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: movl %ecx, %esi
+; X64-NEXT: imull %edx, %esi
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: testl %esi, %esi
+; X64-NEXT: setns %dil
+; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF
+; X64-NEXT: imull %edx, %ecx
+; X64-NEXT: cmovol %edi, %ecx
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; X64-NEXT: movd %xmm2, %edx
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; X64-NEXT: movd %xmm2, %esi
+; X64-NEXT: movl %esi, %edi
+; X64-NEXT: imull %edx, %edi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: setns %al
+; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: imull %edx, %esi
+; X64-NEXT: cmovol %eax, %esi
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; X64-NEXT: movd %xmm1, %r9d
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X64-NEXT: movd %xmm0, %edx
+; X64-NEXT: movl %edx, %edi
+; X64-NEXT: imull %r9d, %edi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: setns %al
+; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: imull %r9d, %edx
+; X64-NEXT: cmovol %eax, %edx
+; X64-NEXT: movd %edx, %xmm0
+; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movd %ecx, %xmm0
+; X64-NEXT: movd %r8d, %xmm2
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: retq
+;
+; X86-LABEL: vec2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %ecx, %esi
+; X86-NEXT: imull %edx, %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: testl %esi, %esi
+; X86-NEXT: setns %al
+; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: imull %edx, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmovol %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: imull %esi, %edi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: setns %al
+; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: imull %esi, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: cmovol %eax, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: imull %edi, %ebx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: setns %al
+; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT: imull %edi, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: cmovol %eax, %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %edi, %ebp
+; X86-NEXT: imull %eax, %ebp
+; X86-NEXT: xorl %ebx, %ebx
+; X86-NEXT: testl %ebp, %ebp
+; X86-NEXT: setns %bl
+; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
+; X86-NEXT: imull %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: cmovol %ebx, %edi
+; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %esi, 4(%eax)
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl $4
+ %tmp = call <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0);
+ ret <4 x i32> %tmp;
+}
+
+define i64 @func7(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func7:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: imulq %rsi
+; X64-NEXT: shrdq $32, %rdx, %rax
+; X64-NEXT: cmpq $2147483647, %rdx # imm = 0x7FFFFFFF
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: cmpq $-2147483648, %rdx # imm = 0x80000000
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func7:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: adcl $0, %ebp
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ebp, %esi
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %ebp, %esi
+; X86-NEXT: cmovnsl %edx, %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: sbbl $0, %edi
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %esi, %edi
+; X86-NEXT: cmovnsl %ecx, %edx
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: setg %cl
+; X86-NEXT: sets %ch
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: setg %bl
+; X86-NEXT: sete %bh
+; X86-NEXT: andb %ch, %bh
+; X86-NEXT: orb %bl, %bh
+; X86-NEXT: movl $2147483647, %esi # imm = 0x7FFFFFFF
+; X86-NEXT: cmovnel %esi, %edx
+; X86-NEXT: movl $-1, %esi
+; X86-NEXT: cmovnel %esi, %eax
+; X86-NEXT: cmpl $-1, %edi
+; X86-NEXT: setl %ch
+; X86-NEXT: sete %bl
+; X86-NEXT: andb %cl, %bl
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: orb %ch, %bl
+; X86-NEXT: cmovnel %esi, %eax
+; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 32);
+ ret i64 %tmp;
+}
+
+define i64 @func8(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func8:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: imulq %rsi
+; X64-NEXT: shrdq $63, %rdx, %rax
+; X64-NEXT: movabsq $4611686018427387903, %rcx # imm = 0x3FFFFFFFFFFFFFFF
+; X64-NEXT: cmpq %rcx, %rdx
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: movabsq $-4611686018427387904, %rcx # imm = 0xC000000000000000
+; X64-NEXT: cmpq %rcx, %rdx
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+;
+; X86-LABEL: func8:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebp
+; X86-NEXT: addl %ebx, %ebp
+; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: imull {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, %ebx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NEXT: addl %ebp, %eax
+; X86-NEXT: adcl %edi, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: adcl $0, %ebx
+; X86-NEXT: movl %edx, %ecx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: sbbl $0, %esi
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %ebx, %esi
+; X86-NEXT: cmovnsl %edx, %ecx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sbbl $0, %ebx
+; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnsl %esi, %ebx
+; X86-NEXT: cmovnsl %ecx, %edi
+; X86-NEXT: movl %ebx, %edx
+; X86-NEXT: shldl $1, %edi, %edx
+; X86-NEXT: shrdl $31, %edi, %eax
+; X86-NEXT: cmpl $1073741823, %ebx # imm = 0x3FFFFFFF
+; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: cmovgl %ecx, %edx
+; X86-NEXT: movl $-1, %ecx
+; X86-NEXT: cmovgl %ecx, %eax
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: cmpl $-1073741824, %ebx # imm = 0xC0000000
+; X86-NEXT: cmovll %ecx, %eax
+; X86-NEXT: movl $-2147483648, %ecx # imm = 0x80000000
+; X86-NEXT: cmovll %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63);
+ ret i64 %tmp;
+}
diff --git a/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll b/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll
new file mode 100644
index 00000000000..53c2074de1f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/smul_fix_sat_constants.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+
+; Verify expansion by using constant values. We just want to cover all the paths layed out by ExpandIntRes_MULFIX.
+
+declare i4 @llvm.smul.fix.sat.i4 (i4, i4, i32)
+declare i32 @llvm.smul.fix.sat.i32 (i32, i32, i32)
+declare i64 @llvm.smul.fix.sat.i64 (i64, i64, i32)
+declare <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
+declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
+
+define i64 @func() nounwind {
+; X64-LABEL: func:
+; X64: # %bb.0:
+; X64-NEXT: movl $2, %ecx
+; X64-NEXT: movl $3, %eax
+; X64-NEXT: imulq %rcx
+; X64-NEXT: shrdq $2, %rdx, %rax
+; X64-NEXT: cmpq $1, %rdx
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: cmpq $-2, %rdx
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 3, i64 2, i32 2);
+ ret i64 %tmp;
+}
+
+define i64 @func2() nounwind {
+; X64-LABEL: func2:
+; X64: # %bb.0:
+; X64-NEXT: movl $3, %eax
+; X64-NEXT: imulq $2, %rax, %rcx
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: testq %rcx, %rcx
+; X64-NEXT: setns %dl
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: addq %rdx, %rcx
+; X64-NEXT: imulq $2, %rax, %rax
+; X64-NEXT: cmovoq %rcx, %rax
+; X64-NEXT: retq
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 3, i64 2, i32 0);
+ ret i64 %tmp;
+}
+
+define i64 @func3() nounwind {
+; X64-LABEL: func3:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: movl $2, %edx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: imulq %rdx
+; X64-NEXT: shrdq $2, %rdx, %rax
+; X64-NEXT: cmpq $1, %rdx
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: cmpq $-2, %rdx
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 2);
+ ret i64 %tmp;
+}
+
+define i64 @func4() nounwind {
+; X64-LABEL: func4:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: movl $2, %edx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: imulq %rdx
+; X64-NEXT: shrdq $32, %rdx, %rax
+; X64-NEXT: cmpq $2147483647, %rdx # imm = 0x7FFFFFFF
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: cmpq $-2147483648, %rdx # imm = 0x80000000
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 32);
+ ret i64 %tmp;
+}
+
+define i64 @func5() nounwind {
+; X64-LABEL: func5:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT: movl $2, %edx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: imulq %rdx
+; X64-NEXT: shrdq $63, %rdx, %rax
+; X64-NEXT: movabsq $4611686018427387903, %rsi # imm = 0x3FFFFFFFFFFFFFFF
+; X64-NEXT: cmpq %rsi, %rdx
+; X64-NEXT: cmovgq %rcx, %rax
+; X64-NEXT: movabsq $-4611686018427387904, %rcx # imm = 0xC000000000000000
+; X64-NEXT: cmpq %rcx, %rdx
+; X64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; X64-NEXT: cmovlq %rcx, %rax
+; X64-NEXT: retq
+ %tmp = call i64 @llvm.smul.fix.sat.i64(i64 9223372036854775807, i64 2, i32 63);
+ ret i64 %tmp;
+}
OpenPOWER on IntegriCloud