diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-09-26 16:43:57 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-09-26 16:43:57 +0000 |
| commit | f51913155c54eefd6ddb5baad291e8935b98be3a (patch) | |
| tree | a36afc97757985178a5d81e3f7de0c94b1a8c7c1 /llvm/lib | |
| parent | 5ec3893b3a307c8b0dd210dcacf68830d2e66da4 (diff) | |
| download | bcm5719-llvm-f51913155c54eefd6ddb5baad291e8935b98be3a.tar.gz bcm5719-llvm-f51913155c54eefd6ddb5baad291e8935b98be3a.zip | |
[X86] Add support for v16i32 UMUL_LOHI/SMUL_LOHI
Summary: This patch extends the v8i32/v4i32 custom lowering to support v16i32
Reviewers: zvi, RKSimon
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D38274
llvm-svn: 314221
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 37 |
1 files changed, 20 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 02e8f9d7cb4..23c0ce32432 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1298,6 +1298,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::MUL, MVT::v8i64, Custom); + setOperationAction(ISD::MUL, MVT::v16i32, Legal); + + setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); @@ -1306,7 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); - setOperationAction(ISD::MUL, MVT::v16i32, Legal); // NonVLX sub-targets extend 128/256 vectors to use the 512 version. setOperationAction(ISD::ABS, MVT::v4i64, Legal); @@ -21800,7 +21803,10 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget, } assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || - (VT == MVT::v8i32 && Subtarget.hasInt256())); + (VT == MVT::v8i32 && Subtarget.hasInt256()) || + (VT == MVT::v16i32 && Subtarget.hasAVX512())); + + int NumElts = VT.getVectorNumElements(); // PMULxD operations multiply each even value (starting at 0) of LHS with // the related value of RHS and produce a widen result. @@ -21814,17 +21820,17 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget, // // Place the odd value at an even position (basically, shift all values 1 // step to the left): - const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1}; + const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1}; // <a|b|c|d> => <b|undef|d|undef> SDValue Odd0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, - makeArrayRef(&Mask[0], VT.getVectorNumElements())); + makeArrayRef(&Mask[0], NumElts)); // <e|f|g|h> => <f|undef|h|undef> SDValue Odd1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, - makeArrayRef(&Mask[0], VT.getVectorNumElements())); + makeArrayRef(&Mask[0], NumElts)); // Emit two multiplies, one for the lower 2 ints and one for the higher 2 // ints. - MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; + MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2); bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; unsigned Opcode = (!IsSigned || !Subtarget.hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; @@ -21836,19 +21842,16 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget &Subtarget, SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, Odd0, Odd1)); // Shuffle it back into the right order. - SDValue Highs, Lows; - if (VT == MVT::v8i32) { - const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; - Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; - Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); - } else { - const int HighMask[] = {1, 5, 3, 7}; - Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 4, 2, 6}; - Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + SmallVector<int, 16> HighMask(NumElts); + SmallVector<int, 16> LowMask(NumElts); + for (int i = 0; i != NumElts; ++i) { + HighMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1; + LowMask[i] = (i / 2) * 2 + ((i % 2) * NumElts); } + SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. if (IsSigned && !Subtarget.hasSSE41()) { |

