summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp183
1 files changed, 75 insertions, 108 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 345ff72729f..fe4f157fa86 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -782,8 +782,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
@@ -1087,9 +1087,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
- setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
-
+ setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
@@ -1331,8 +1330,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
- setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
- setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
+ setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
@@ -22901,6 +22900,75 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256())
return Lower256IntArith(Op, DAG);
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) {
+ assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
+ (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
+ (VT == MVT::v16i32 && Subtarget.hasAVX512()));
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+
+ int NumElts = VT.getVectorNumElements();
+
+ // PMULxD operations multiply each even value (starting at 0) of LHS with
+ // the related value of RHS and produce a widen result.
+ // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ //
+ // In other word, to have all the results, we need to perform two PMULxD:
+ // 1. one with the even values.
+ // 2. one with the odd values.
+ // To achieve #2, with need to place the odd values at an even position.
+ //
+ // Place the odd value at an even position (basically, shift all values 1
+ // step to the left):
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1,
+ 9, -1, 11, -1, 13, -1, 15, -1};
+ // <a|b|c|d> => <b|undef|d|undef>
+ SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
+ makeArrayRef(&Mask[0], NumElts));
+ // <e|f|g|h> => <f|undef|h|undef>
+ SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
+ makeArrayRef(&Mask[0], NumElts));
+
+ // Emit two multiplies, one for the lower 2 ints and one for the higher 2
+ // ints.
+ MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
+ bool IsSigned = Op->getOpcode() == ISD::MULHS;
+ unsigned Opcode =
+ (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
+ // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
+ // => <2 x i64> <ae|cg>
+ SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+ DAG.getBitcast(MulVT, Op0),
+ DAG.getBitcast(MulVT, Op1)));
+ // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
+ // => <2 x i64> <bf|dh>
+ SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
+ DAG.getBitcast(MulVT, Odd0),
+ DAG.getBitcast(MulVT, Odd1)));
+
+ // Shuffle it back into the right order.
+ SmallVector<int, 16> ShufMask(NumElts);
+ for (int i = 0; i != NumElts; ++i)
+ ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
+
+ SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask);
+
+ // If we have a signed multiply but no PMULDQ fix up the result of an
+ // unsigned multiply.
+ if (IsSigned && !Subtarget.hasSSE41()) {
+ SDValue ShAmt = DAG.getConstant(31, dl, VT);
+ SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
+ SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
+
+ SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
+ }
+
+ return Res;
+ }
+
// Only i8 vectors should need custom lowering after this.
assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) ||
(VT == MVT::v64i8 && Subtarget.hasBWI())) &&
@@ -23084,105 +23152,6 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
return DAG.getBitcast(VT, CallInfo.first);
}
-static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- MVT VT = Op0.getSimpleValueType();
- SDLoc dl(Op);
-
- // Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget.hasInt256()) {
- unsigned Opcode = Op.getOpcode();
- unsigned NumElems = VT.getVectorNumElements();
- MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), NumElems / 2);
- SDValue Lo0 = extract128BitVector(Op0, 0, DAG, dl);
- SDValue Lo1 = extract128BitVector(Op1, 0, DAG, dl);
- SDValue Hi0 = extract128BitVector(Op0, NumElems / 2, DAG, dl);
- SDValue Hi1 = extract128BitVector(Op1, NumElems / 2, DAG, dl);
- SDValue Lo = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Lo0, Lo1);
- SDValue Hi = DAG.getNode(Opcode, dl, DAG.getVTList(HalfVT, HalfVT), Hi0, Hi1);
- SDValue Ops[] = {
- DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(0), Hi.getValue(0)),
- DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo.getValue(1), Hi.getValue(1))
- };
- return DAG.getMergeValues(Ops, dl);
- }
-
- assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) ||
- (VT == MVT::v8i32 && Subtarget.hasInt256()) ||
- (VT == MVT::v16i32 && Subtarget.hasAVX512()));
-
- int NumElts = VT.getVectorNumElements();
-
- // PMULxD operations multiply each even value (starting at 0) of LHS with
- // the related value of RHS and produce a widen result.
- // E.g., PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
- // => <2 x i64> <ae|cg>
- //
- // In other word, to have all the results, we need to perform two PMULxD:
- // 1. one with the even values.
- // 2. one with the odd values.
- // To achieve #2, with need to place the odd values at an even position.
- //
- // Place the odd value at an even position (basically, shift all values 1
- // step to the left):
- const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1};
- // <a|b|c|d> => <b|undef|d|undef>
- SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0,
- makeArrayRef(&Mask[0], NumElts));
- // <e|f|g|h> => <f|undef|h|undef>
- SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1,
- makeArrayRef(&Mask[0], NumElts));
-
- // Emit two multiplies, one for the lower 2 ints and one for the higher 2
- // ints.
- MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2);
- bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
- unsigned Opcode =
- (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
- // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
- // => <2 x i64> <ae|cg>
- SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
- DAG.getBitcast(MulVT, Op0),
- DAG.getBitcast(MulVT, Op1)));
- // PMULUDQ <4 x i32> <b|undef|d|undef>, <4 x i32> <f|undef|h|undef>
- // => <2 x i64> <bf|dh>
- SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT,
- DAG.getBitcast(MulVT, Odd0),
- DAG.getBitcast(MulVT, Odd1)));
-
- // Shuffle it back into the right order.
- SmallVector<int, 16> HighMask(NumElts);
- SmallVector<int, 16> LowMask(NumElts);
- for (int i = 0; i != NumElts; ++i) {
- HighMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1;
- LowMask[i] = (i / 2) * 2 + ((i % 2) * NumElts);
- }
-
- SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
-
- // If we have a signed multiply but no PMULDQ fix up the high parts of a
- // unsigned multiply.
- if (IsSigned && !Subtarget.hasSSE41()) {
- SDValue ShAmt = DAG.getConstant(
- 31, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
- SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
- SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
-
- SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
- Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
- }
-
- // The first result of MUL_LOHI is actually the low value, followed by the
- // high value.
- SDValue Ops[] = {Lows, Highs};
- return DAG.getMergeValues(Ops, dl);
-}
-
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
@@ -25579,8 +25548,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG);
- case ISD::UMUL_LOHI:
- case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
case ISD::ROTL:
case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
OpenPOWER on IntegriCloud