summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-17 16:29:46 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-17 16:29:46 +0000
commitb0e986f88e4db23f1e2613b9e80e496e689596d7 (patch)
tree093ef74fcce223aa6da635961234657ed4edc115 /llvm/lib/Target/X86
parente930f569f778a6d011fcec42970a7a6f5b1d5b80 (diff)
downloadbcm5719-llvm-b0e986f88e4db23f1e2613b9e80e496e689596d7.tar.gz
bcm5719-llvm-b0e986f88e4db23f1e2613b9e80e496e689596d7.zip
[X86] Pass the parent SDNode to X86DAGToDAGISel::selectScalarSSELoad to simplify the hasSingleUseFromRoot handling.
Some of the calls to hasSingleUseFromRoot were passing the load itself. If the load's chain result has a user this would count against that. By getting the true parent of the match and ensuring any intermediate between the match and the load have a single use we can avoid this case. isLegalToFold will take care of checking users of the load's data output. This fixed at least fma-scalar-memfold.ll to succed without the peephole pass. llvm-svn: 334908
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp22
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td4
2 files changed, 12 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9fb40ce6857..41c15b737b2 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -215,7 +215,7 @@ namespace {
bool selectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool selectScalarSSELoad(SDNode *Root, SDValue N,
+ bool selectScalarSSELoad(SDNode *Root, SDNode *Parent, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
@@ -1685,8 +1685,7 @@ bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
// We can only fold a load if all nodes between it and the root node have a
// single use. If there are additional uses, we could end up duplicating the
// load.
-static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
- SDNode *User = *N->use_begin();
+static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *User) {
while (User != Root) {
if (!User->hasOneUse())
return false;
@@ -1703,17 +1702,19 @@ static bool hasSingleUsesFromRoot(SDNode *Root, SDNode *N) {
/// We also return:
/// PatternChainNode: this is the matched node that has a chain input and
/// output.
-bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
+bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDNode *Parent,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
SDValue &PatternNodeWithChain) {
+ if (!hasSingleUsesFromRoot(Root, Parent))
+ return false;
+
// We can allow a full vector load here since narrowing a load is ok.
if (ISD::isNON_EXTLoad(N.getNode())) {
PatternNodeWithChain = N;
if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
- hasSingleUsesFromRoot(Root, N.getNode())) {
+ IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
Segment);
@@ -1724,8 +1725,7 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
PatternNodeWithChain = N;
if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel) &&
- hasSingleUsesFromRoot(Root, N.getNode())) {
+ IsLegalToFold(PatternNodeWithChain, Parent, Root, OptLevel)) {
auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
Segment);
@@ -1739,8 +1739,7 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
PatternNodeWithChain = N.getOperand(0);
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
- hasSingleUsesFromRoot(Root, N.getNode())) {
+ IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
Segment);
@@ -1756,8 +1755,7 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
PatternNodeWithChain = N.getOperand(0).getOperand(0);
if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
- IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel) &&
- hasSingleUsesFromRoot(Root, N.getNode())) {
+ IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index ee72a7231e3..91b2a568f4d 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -651,10 +651,10 @@ def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>;
// forms.
def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot]>;
+ SDNPWantRoot, SDNPWantParent]>;
def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
- SDNPWantRoot]>;
+ SDNPWantRoot, SDNPWantParent]>;
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
OpenPOWER on IntegriCloud