summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-09-11 01:39:04 +0000
committerJustin Lebar <jlebar@google.com>2016-09-11 01:39:04 +0000
commit6d6b11a4a6a418956ac963bb9a7d9b1fd2966e3d (patch)
treee2efeffcdeaa94a2862c48bccc90db70310f7488 /llvm/lib
parentadbf09e8cfd3aa6bb104f45bf5f39e4e8578d2f8 (diff)
downloadbcm5719-llvm-6d6b11a4a6a418956ac963bb9a7d9b1fd2966e3d.tar.gz
bcm5719-llvm-6d6b11a4a6a418956ac963bb9a7d9b1fd2966e3d.zip
[NVPTX] Use ldg for explicitly invariant loads.
Summary: With this change (plus some changes to prevent !invariant from being clobbered within llvm), clang will be able to model the __ldg CUDA builtin as an invariant load, rather than as a target-specific llvm intrinsic. This will let the optimizer play with these loads -- specifically, we should be able to vectorize them in the load-store vectorizer. Reviewers: tra Subscribers: jholewinski, hfinkel, llvm-commits, chandlerc Differential Revision: https://reviews.llvm.org/D23477 llvm-svn: 281152
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp35
1 files changed, 22 insertions, 13 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 14aa3f15f5c..7ab15ee94cf 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -558,21 +558,30 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
unsigned CodeAddrSpace, MachineFunction *F) {
- // To use non-coherent caching, the load has to be from global
- // memory and we have to prove that the memory area is not written
- // to anywhere for the duration of the kernel call, not even after
- // the load.
+ // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
+ // space.
//
- // To ensure that there are no writes to the memory, we require the
- // underlying pointer to be a noalias (__restrict) kernel parameter
- // that is never used for a write. We can only do this for kernel
- // functions since from within a device function, we cannot know if
- // there were or will be writes to the memory from the caller - or we
- // could, but then we would have to do inter-procedural analysis.
- if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
- !isKernelFunction(*F->getFunction())) {
+ // We have two ways of identifying invariant loads: Loads may be explicitly
+ // marked as invariant, or we may infer them to be invariant.
+ //
+ // We currently infer invariance only for kernel function pointer params that
+ // are noalias (i.e. __restrict) and never written to.
+ //
+ // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
+ // not during the SelectionDAG phase).
+ //
+ // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
+ // explicitly invariant loads because these are how clang tells us to use ldg
+ // when the user uses a builtin.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
+ return false;
+
+ if (N->isInvariant())
+ return true;
+
+ // Load wasn't explicitly invariant. Attempt to infer invariance.
+ if (!isKernelFunction(*F->getFunction()))
return false;
- }
// We use GetUnderlyingObjects() here instead of
// GetUnderlyingObject() mainly because the former looks through phi
OpenPOWER on IntegriCloud