summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp39
1 files changed, 29 insertions, 10 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 1d233c453b4..2d0098b392f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
@@ -546,18 +547,36 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
}
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
- unsigned codeAddrSpace, const DataLayout &DL) {
- if (!Subtarget.hasLDG() || codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL) {
+ unsigned CodeAddrSpace, MachineFunction *F) {
+ // To use non-coherent caching, the load has to be from global
+ // memory and we have to prove that the memory area is not written
+ // to anywhere for the duration of the kernel call, not even after
+ // the load.
+ //
+ // To ensure that there are no writes to the memory, we require the
+ // underlying pointer to be a noalias (__restrict) kernel parameter
+ // that is never used for a write. We can only do this for kernel
+ // functions since from within a device function, we cannot know if
+ // there were or will be writes to the memory from the caller - or we
+ // could, but then we would have to do inter-procedural analysis.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
+ !isKernelFunction(*F->getFunction())) {
return false;
}
- // Check whether load operates on a readonly argument.
- bool canUseLDG = false;
- if (const Argument *A = dyn_cast<const Argument>(
- GetUnderlyingObject(N->getMemOperand()->getValue(), DL)))
- canUseLDG = A->onlyReadsMemory() && A->hasNoAliasAttr();
+ // We use GetUnderlyingObjects() here instead of
+ // GetUnderlyingObject() mainly because the former looks through phi
+ // nodes while the latter does not. We need to look through phi
+ // nodes to handle pointer induction variables.
+ SmallVector<Value *, 8> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+ Objs, F->getDataLayout());
+ for (Value *Obj : Objs) {
+ auto *A = dyn_cast<const Argument>(Obj);
+ if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
+ }
- return canUseLDG;
+ return true;
}
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
@@ -654,7 +673,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD);
- if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, CurDAG->getDataLayout())) {
+ if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
return SelectLDGLDU(N);
}
@@ -892,7 +911,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
- if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, CurDAG->getDataLayout())) {
+ if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
return SelectLDGLDU(N);
}
OpenPOWER on IntegriCloud