diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-11-15 18:59:31 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-11-15 18:59:31 +0000 |
| commit | 73bb04ab6ff0efdfc38cc2c9fa176f6ee28700f7 (patch) | |
| tree | d8141e832d326fd728e18715cd400d3bb4648794 /llvm/lib | |
| parent | fc3163b67a87e93e8beda976ccd16418ca879284 (diff) | |
| download | bcm5719-llvm-73bb04ab6ff0efdfc38cc2c9fa176f6ee28700f7.tar.gz bcm5719-llvm-73bb04ab6ff0efdfc38cc2c9fa176f6ee28700f7.zip | |
[X86] Add -x86-experimental-vector-widening support to reduceVMULWidth and combineMulToPMADDWD
In reduceVMULWidth, we no longer need to worry about extending the vector to 128 bits first. Regular widening of extends, muls and shuffles will take care of that for us.
In combineMulToPMADDWD, we can handle v2i32 multiplies and allow the VPMADDWD to be widened to v4i32 during type legalization by adding custom widening like we do have for AVG/ADDUS/SUBUS. I had to modify that code a little to allow different and output VTs.
Differential Revision: https://reviews.llvm.org/D54512
llvm-svn: 346980
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a3f61b5ee35..ecdabf6ce2d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26128,30 +26128,35 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case X86ISD::VPMADDWD: case X86ISD::ADDUS: case X86ISD::SUBUS: case X86ISD::AVG: { - // Legalize types for X86ISD::AVG/ADDUS/SUBUS by widening. + // Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening. assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - auto InVT = N->getValueType(0); - assert(InVT.getSizeInBits() < 128); - assert(128 % InVT.getSizeInBits() == 0); + EVT VT = N->getValueType(0); + EVT InVT = N->getOperand(0).getValueType(); + assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && + "Expected a VT that divides into 128 bits."); unsigned NumConcat = 128 / InVT.getSizeInBits(); - EVT RegVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), - NumConcat * InVT.getVectorNumElements()); + EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), + NumConcat * InVT.getVectorNumElements()); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + NumConcat * VT.getVectorNumElements()); SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = N->getOperand(0); - SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); + SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); Ops[0] = N->getOperand(1); - SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); + SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); - SDValue Res = DAG.getNode(N->getOpcode(), dl, RegVT, InVec0, InVec1); - if (getTypeAction(*DAG.getContext(), InVT) != TypeWidenVector) - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res, + SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); + if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); Results.push_back(Res); return; @@ -34431,7 +34436,8 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); - if (NumElts >= OpsVT.getVectorNumElements()) { + if (ExperimentalVectorWideningLegalization || + NumElts >= OpsVT.getVectorNumElements()) { // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the // lower part is needed. SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); @@ -34620,8 +34626,10 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, return SDValue(); // Make sure the vXi16 type is legal. This covers the AVX512 without BWI case. + // Also allow v2i32 if it will be widened. MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); - if (!DAG.getTargetLoweringInfo().isTypeLegal(WVT)) + if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) || + DAG.getTargetLoweringInfo().isTypeLegal(WVT))) return SDValue(); SDValue N0 = N->getOperand(0); |

