summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-11-15 18:59:31 +0000
committerCraig Topper <craig.topper@intel.com>2018-11-15 18:59:31 +0000
commit73bb04ab6ff0efdfc38cc2c9fa176f6ee28700f7 (patch)
treed8141e832d326fd728e18715cd400d3bb4648794 /llvm/lib
parentfc3163b67a87e93e8beda976ccd16418ca879284 (diff)
downloadbcm5719-llvm-73bb04ab6ff0efdfc38cc2c9fa176f6ee28700f7.tar.gz
bcm5719-llvm-73bb04ab6ff0efdfc38cc2c9fa176f6ee28700f7.zip
[X86] Add -x86-experimental-vector-widening support to reduceVMULWidth and combineMulToPMADDWD
In reduceVMULWidth, we no longer need to worry about extending the vector to 128 bits first. Regular widening of extends, muls and shuffles will take care of that for us. In combineMulToPMADDWD, we can handle v2i32 multiplies and allow the VPMADDWD to be widened to v4i32 during type legalization by adding custom widening like we do have for AVG/ADDUS/SUBUS. I had to modify that code a little to allow different and output VTs. Differential Revision: https://reviews.llvm.org/D54512 llvm-svn: 346980
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp36
1 files changed, 22 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a3f61b5ee35..ecdabf6ce2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26128,30 +26128,35 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case X86ISD::VPMADDWD:
case X86ISD::ADDUS:
case X86ISD::SUBUS:
case X86ISD::AVG: {
- // Legalize types for X86ISD::AVG/ADDUS/SUBUS by widening.
+ // Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
- auto InVT = N->getValueType(0);
- assert(InVT.getSizeInBits() < 128);
- assert(128 % InVT.getSizeInBits() == 0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = N->getOperand(0).getValueType();
+ assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 &&
+ "Expected a VT that divides into 128 bits.");
unsigned NumConcat = 128 / InVT.getSizeInBits();
- EVT RegVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(),
- NumConcat * InVT.getVectorNumElements());
+ EVT InWideVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(),
+ NumConcat * InVT.getVectorNumElements());
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ NumConcat * VT.getVectorNumElements());
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = N->getOperand(0);
- SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
Ops[0] = N->getOperand(1);
- SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops);
- SDValue Res = DAG.getNode(N->getOpcode(), dl, RegVT, InVec0, InVec1);
- if (getTypeAction(*DAG.getContext(), InVT) != TypeWidenVector)
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1);
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getIntPtrConstant(0, dl));
Results.push_back(Res);
return;
@@ -34431,7 +34436,8 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0);
SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1);
- if (NumElts >= OpsVT.getVectorNumElements()) {
+ if (ExperimentalVectorWideningLegalization ||
+ NumElts >= OpsVT.getVectorNumElements()) {
// Generate the lower part of mul: pmullw. For MULU8/MULS8, only the
// lower part is needed.
SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1);
@@ -34620,8 +34626,10 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Make sure the vXi16 type is legal. This covers the AVX512 without BWI case.
+ // Also allow v2i32 if it will be widened.
MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
- if (!DAG.getTargetLoweringInfo().isTypeLegal(WVT))
+ if (!((ExperimentalVectorWideningLegalization && VT == MVT::v2i32) ||
+ DAG.getTargetLoweringInfo().isTypeLegal(WVT)))
return SDValue();
SDValue N0 = N->getOperand(0);
OpenPOWER on IntegriCloud