summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorUlrich Weigand <ulrich.weigand@de.ibm.com>2020-01-13 14:37:07 +0100
committerUlrich Weigand <ulrich.weigand@de.ibm.com>2020-01-13 14:38:49 +0100
commit04a86966fbf46809d7a165b1f089e4d076f0f8a5 (patch)
tree29647c03311d2de4be6d3aad73bd80d3d6b22ed7 /llvm/lib/CodeGen/SelectionDAG
parentd7d88b9d8b3efd8b4b07074aa64b5b4136a35b2c (diff)
downloadbcm5719-llvm-04a86966fbf46809d7a165b1f089e4d076f0f8a5.tar.gz
bcm5719-llvm-04a86966fbf46809d7a165b1f089e4d076f0f8a5.zip
[FPEnv] Fix chain handling for fpexcept.strict nodes
We need to ensure that fpexcept.strict nodes are not optimized away even if the result is unused. To do that, we need to chain them into the block's terminator nodes, like already done for PendingExcepts. This patch adds two new lists of pending chains, PendingConstrainedFP and PendingConstrainedFPStrict to hold constrained FP intrinsic nodes without and with fpexcept.strict markers. This allows not only to solve the above problem, but also to relax chains a bit further by no longer flushing all FP nodes before a store or other memory access. (They are still flushed before nodes with other side effects.) Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D72341
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp72
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h23
2 files changed, 81 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0e749f2e28e..c3fbdf0d763 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1025,6 +1025,8 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
@@ -1035,7 +1037,7 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-SDValue SelectionDAGBuilder::getRoot() {
+SDValue SelectionDAGBuilder::getMemoryRoot() {
if (PendingLoads.empty())
return DAG.getRoot();
@@ -1053,9 +1055,31 @@ SDValue SelectionDAGBuilder::getRoot() {
return Root;
}
+SDValue SelectionDAGBuilder::getRoot() {
+ // Chain up all pending constrained intrinsics together with all
+ // pending loads, by simply appending them to PendingLoads and
+ // then calling getMemoryRoot().
+ PendingLoads.reserve(PendingLoads.size() +
+ PendingConstrainedFP.size() +
+ PendingConstrainedFPStrict.size());
+ PendingLoads.append(PendingConstrainedFP.begin(),
+ PendingConstrainedFP.end());
+ PendingLoads.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
+ return getMemoryRoot();
+}
+
SDValue SelectionDAGBuilder::getControlRoot() {
SDValue Root = DAG.getRoot();
+ // We need to emit pending fpexcept.strict constrained intrinsics,
+ // so append them to the PendingExports list.
+ PendingExports.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFPStrict.clear();
+
if (PendingExports.empty())
return Root;
@@ -4060,9 +4084,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (isVolatile || NumValues > MaxParallelChains)
+ if (isVolatile)
// Serialize volatile loads with other side effects.
Root = getRoot();
+ else if (NumValues > MaxParallelChains)
+ Root = getMemoryRoot();
else if (AA &&
AA->pointsToConstantMemory(MemoryLocation(
SV,
@@ -4237,7 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Src = getValue(SrcV);
SDValue Ptr = getValue(PtrV);
- SDValue Root = getRoot();
+ SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
unsigned Alignment = I.getAlignment();
@@ -4329,7 +4355,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo);
SDValue StoreNode =
- DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
+ DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
@@ -4463,7 +4489,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
+ SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO, IndexType);
DAG.setRoot(Scatter);
@@ -5850,7 +5876,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -5866,7 +5893,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
- SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
return;
@@ -5884,7 +5912,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
- SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
@@ -7039,9 +7068,29 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
assert(Result.getNode()->getNumValues() == 2);
- // See above -- chain is handled like for loads here.
+
+ // Push node to the appropriate list so that future instructions can be
+ // chained up correctly.
SDValue OutChain = Result.getValue(1);
- PendingLoads.push_back(OutChain);
+ switch (FPI.getExceptionBehavior().getValue()) {
+ case fp::ExceptionBehavior::ebIgnore:
+ // The only reason why ebIgnore nodes still need to be chained is that
+ // they might depend on the current rounding mode, and therefore must
+ // not be moved across instruction that may change that mode.
+ LLVM_FALLTHROUGH;
+ case fp::ExceptionBehavior::ebMayTrap:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks.
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks or read floating-point exception flags.
+ // In addition, they cannot be optimized out even if unused.
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
}
@@ -7424,7 +7473,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol,
false, /*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 1579ef3ad75..12770c8fb2d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -143,6 +143,17 @@ private:
/// tokenfactor for them just before terminator instructions.
SmallVector<SDValue, 8> PendingExports;
+ /// Similar to loads, nodes corresponding to constrained FP intrinsics are
+ /// bunched up and emitted when necessary. These can be moved across each
+ /// other and any (normal) memory operation (load or store), but not across
+ /// calls or instructions having unspecified side effects. As a special
+ /// case, constrained FP intrinsics using fpexcept.strict may not be deleted
+ /// even if otherwise unused, so they need to be chained before any
+ /// terminator instruction (like PendingExports). We track the latter
+ /// set of nodes in a separate list.
+ SmallVector<SDValue, 8> PendingConstrainedFP;
+ SmallVector<SDValue, 8> PendingConstrainedFPStrict;
+
/// A unique monotonically increasing number used to order the SDNodes we
/// create.
unsigned SDNodeOrder;
@@ -447,12 +458,18 @@ public:
/// Return the current virtual root of the Selection DAG, flushing any
/// PendingLoad items. This must be done before emitting a store or any other
- /// node that may need to be ordered after any prior load instructions.
+ /// memory node that may need to be ordered after any prior load instructions.
+ SDValue getMemoryRoot();
+
+ /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
+ /// items. This must be done before emitting any call other any other node
+ /// that may need to be ordered after FP instructions due to other side
+ /// effects.
SDValue getRoot();
/// Similar to getRoot, but instead of flushing all the PendingLoad items,
- /// flush all the PendingExports items. It is necessary to do this before
- /// emitting a terminator instruction.
+ /// flush all the PendingExports (and PendingConstrainedFPStrict) items.
+ /// It is necessary to do this before emitting a terminator instruction.
SDValue getControlRoot();
SDLoc getCurSDLoc() const {
OpenPOWER on IntegriCloud