summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-06 21:11:45 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-06 21:11:45 +0000
commitb4ba3cbda01e710e64948f43cbf9bfdec5ec5855 (patch)
tree7f12d5d11011e085eb15e5125e1c13a624f0e520 /llvm/lib
parentd84cd7caa87e8486a131c76f45595535678c6552 (diff)
downloadbcm5719-llvm-b4ba3cbda01e710e64948f43cbf9bfdec5ec5855.tar.gz
bcm5719-llvm-b4ba3cbda01e710e64948f43cbf9bfdec5ec5855.zip
[X86][AVX] Access a scalar float/double as a free extract from a broadcast load (PR43217)
If a fp scalar is loaded and then used as both a scalar and a vector broadcast, perform the load as a broadcast and then extract the scalar for 'free' from the 0th element. This involved switching the order of the X86ISD::BROADCAST combines so we only convert to X86ISD::BROADCAST_LOAD once all other canonicalizations have been attempted. Adds a DAGCombinerInfo::recursivelyDeleteUnusedNodes wrapper. Fixes PR43217 Differential Revision: https://reviews.llvm.org/D68544 llvm-svn: 373871
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp5
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp35
2 files changed, 29 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 38fd9742d2d..7ea908437ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -761,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
+bool TargetLowering::DAGCombinerInfo::
+recursivelyDeleteUnusedNodes(SDNode *N) {
+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
+}
+
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 784bf6d58c5..915046048ff 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33429,8 +33429,19 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+ // Share broadcast with the longest vector and extract low subvector (free).
+ for (SDNode *User : Src->uses())
+ if (User != N.getNode() &&
+ (User->getOpcode() == X86ISD::VBROADCAST ||
+ User->getOpcode() == X86ISD::VBROADCAST_LOAD) &&
+ User->getValueSizeInBits(0) > VT.getSizeInBits()) {
+ return extractSubVector(SDValue(User, 0), 0, DAG, DL,
+ VT.getSizeInBits());
+ }
+
// vbroadcast(scalarload X) -> vbroadcast_load X
- if (!SrcVT.isVector() && Src.hasOneUse() &&
+ // For float loads, extract other uses of the scalar from the broadcast.
+ if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) &&
ISD::isNormalLoad(Src.getNode())) {
LoadSDNode *LN = cast<LoadSDNode>(Src);
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
@@ -33438,17 +33449,19 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
SDValue BcastLd =
DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
- return BcastLd;
- }
-
- // Share broadcast with the longest vector and extract low subvector (free).
- for (SDNode *User : Src->uses())
- if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
- User->getValueSizeInBits(0) > VT.getSizeInBits()) {
- return extractSubVector(SDValue(User, 0), 0, DAG, DL,
- VT.getSizeInBits());
+ // If the load value is used only by N, replace it via CombineTo N.
+ bool NoReplaceExtract = Src.hasOneUse();
+ DCI.CombineTo(N.getNode(), BcastLd);
+ if (NoReplaceExtract) {
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1));
+ DCI.recursivelyDeleteUnusedNodes(LN);
+ } else {
+ SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd,
+ DAG.getIntPtrConstant(0, DL));
+ DCI.CombineTo(LN, Scl, BcastLd.getValue(1));
}
+ return N; // Return N so it doesn't get rechecked!
+ }
return SDValue();
}
OpenPOWER on IntegriCloud