summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-08-26 18:23:26 +0000
committerCraig Topper <craig.topper@intel.com>2019-08-26 18:23:26 +0000
commit36d1588f017bb6e971cb14cc6e7094c3db9c0436 (patch)
tree1a3fa37ca359557e1cf523e99ac2e47d51ddfd25 /llvm/lib
parentac1d5986c836924896aeb934ff71432f80c70063 (diff)
downloadbcm5719-llvm-36d1588f017bb6e971cb14cc6e7094c3db9c0436.tar.gz
bcm5719-llvm-36d1588f017bb6e971cb14cc6e7094c3db9c0436.zip
[X86] Add a hack to combinePMULDQ to manually turn SIGN_EXTEND_VECTOR_INREG/ZERO_EXTEND_VECTOR_INREG inputs into an ANY_EXTEND_VECTOR_INREG style shuffle
ANY_EXTEND_VECTOR_INREG isn't currently marked Legal which prevents SimplifyDemandedBits from turning SIGN/ZERO_EXTEND_VECTOR_INREG into it after op legalization. And even if we did make it Legal, combineExtInVec doesn't do shuffle combining on the VECTOR_INREG nodes until AVX1. This patch adds a quick hack to combinePMULDQ to directly emit a vector shuffle corresponding to an ANY_EXTEND_VECTOR_INREG operation. This avoids both of those issues without creating any other regressions on our tests. The xop-ifma.ll change here also showed up when I tried to resurrect D56306 and seemed to be the only improvement that patch creates now. This is a more direct way to get the benefit. Differential Revision: https://reviews.llvm.org/D66436 llvm-svn: 369942
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp28
1 files changed, 28 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f654f1d7a72..9f6a0b76e06 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44659,6 +44659,34 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
return SDValue(N, 0);
+ // If the input is an extend_invec and the SimplifyDemandedBits call didn't
+ // convert it to any_extend_invec, due to the LegalOperations check, do the
+ // conversion directly to a vector shuffle manually. This exposes combine
+ // opportunities missed by combineExtInVec not calling
+ // combineX86ShufflesRecursively on SSE4.1 targets.
+ // FIXME: This is basically a hack around several other issues related to
+ // ANY_EXTEND_VECTOR_INREG.
+ if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ LHS.getOperand(0).getValueType() == MVT::v4i32) {
+ SDLoc dl(N);
+ LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0),
+ LHS.getOperand(0), { 0, -1, 1, -1 });
+ LHS = DAG.getBitcast(MVT::v2i64, LHS);
+ return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
+ }
+ if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() &&
+ (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ RHS.getOperand(0).getValueType() == MVT::v4i32) {
+ SDLoc dl(N);
+ RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0),
+ RHS.getOperand(0), { 0, -1, 1, -1 });
+ RHS = DAG.getBitcast(MVT::v2i64, RHS);
+ return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS);
+ }
+
return SDValue();
}
OpenPOWER on IntegriCloud