summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/NVPTX')
-rw-r--r--llvm/lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/NVVMIntrRange.cpp154
4 files changed, 159 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index ed43296b870..b67c4050086 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -32,6 +32,7 @@ set(NVPTXCodeGen_sources
NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
+ NVVMIntrRange.cpp
NVVMReflect.cpp
)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 15a4205f6aa..e91385ac13f 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -47,6 +47,7 @@ ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
FunctionPass *createNVPTXInferAddressSpacesPass();
+FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass();
FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 583baad65c9..b9f5919964c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -55,6 +55,7 @@ static cl::opt<bool> UseInferAddressSpaces(
"NVPTXFavorNonGenericAddrSpaces"));
namespace llvm {
+void initializeNVVMIntrRangePass(PassRegistry&);
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
@@ -75,6 +76,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeNVVMReflectPass(PR);
+ initializeNVVMIntrRangePass(PR);
initializeGenericToNVVMPass(PR);
initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);
@@ -176,6 +178,7 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
PM.add(createNVVMReflectPass());
+ PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
diff --git a/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
new file mode 100644
index 00000000000..09f328d4c53
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -0,0 +1,154 @@
+//===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass adds appropriate !range metadata for calls to NVVM
+// intrinsics that return a limited range of values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvvm-intr-range"
+
+namespace llvm { void initializeNVVMIntrRangePass(PassRegistry &); }
+
+// Add !range metadata based on limits of given SM variant.
+static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
+ cl::Hidden, cl::desc("SM variant"));
+
+namespace {
+class NVVMIntrRange : public FunctionPass {
+ private:
+ struct {
+ unsigned x, y, z;
+ } MaxBlockSize, MaxGridSize;
+
+ public:
+ static char ID;
+ NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
+ NVVMIntrRange(unsigned int SmVersion)
+ : FunctionPass(ID), MaxBlockSize{1024, 1024, 64},
+ MaxGridSize{SmVersion >= 30 ? 0x7fffffffu : 0xffffu, 0xffff, 0xffff} {
+ initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &) override;
+};
+}
+
+FunctionPass *llvm::createNVVMIntrRangePass(unsigned int SmVersion) {
+ return new NVVMIntrRange(SmVersion);
+}
+
+char NVVMIntrRange::ID = 0;
+INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
+ "Add !range metadata to NVVM intrinsics.", false, false)
+
+// Adds the passed-in [Low,High) range information as metadata to the
+// passed-in call instruction.
+static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
+ LLVMContext &Context = C->getParent()->getContext();
+ IntegerType *Int32Ty = Type::getInt32Ty(Context);
+ Metadata *LowAndHigh[] = {
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Low)),
+ ConstantAsMetadata::get(ConstantInt::get(Int32Ty, High))};
+ C->setMetadata(LLVMContext::MD_range, MDNode::get(Context, LowAndHigh));
+ return true;
+}
+
+bool NVVMIntrRange::runOnFunction(Function &F) {
+ // Go through the calls in this function.
+ bool Changed = false;
+ for (Instruction &I : instructions(F)) {
+ CallInst *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+
+ if (Function *Callee = Call->getCalledFunction()) {
+ switch (Callee->getIntrinsicID()) {
+ // Index within block
+ case Intrinsic::ptx_read_tid_x:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+ Changed |= addRangeMetadata(0, MaxBlockSize.x, Call);
+ break;
+ case Intrinsic::ptx_read_tid_y:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+ Changed |= addRangeMetadata(0, MaxBlockSize.y, Call);
+ break;
+ case Intrinsic::ptx_read_tid_z:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+ Changed |= addRangeMetadata(0, MaxBlockSize.z, Call);
+ break;
+
+ // Block size
+ case Intrinsic::ptx_read_ntid_x:
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
+ Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call);
+ break;
+ case Intrinsic::ptx_read_ntid_y:
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
+ Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call);
+ break;
+ case Intrinsic::ptx_read_ntid_z:
+ case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
+ Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call);
+ break;
+
+ // Index within grid
+ case Intrinsic::ptx_read_ctaid_x:
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
+ Changed |= addRangeMetadata(0, MaxGridSize.x, Call);
+ break;
+ case Intrinsic::ptx_read_ctaid_y:
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
+ Changed |= addRangeMetadata(0, MaxGridSize.y, Call);
+ break;
+ case Intrinsic::ptx_read_ctaid_z:
+ case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
+ Changed |= addRangeMetadata(0, MaxGridSize.z, Call);
+ break;
+
+ // Grid size
+ case Intrinsic::ptx_read_nctaid_x:
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
+ Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call);
+ break;
+ case Intrinsic::ptx_read_nctaid_y:
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
+ Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call);
+ break;
+ case Intrinsic::ptx_read_nctaid_z:
+ case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
+ Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call);
+ break;
+
+ // warp size is constant 32.
+ case Intrinsic::nvvm_read_ptx_sreg_warpsize:
+ Changed |= addRangeMetadata(32, 32+1, Call);
+ break;
+
+ // Lane ID is [0..warpsize)
+ case Intrinsic::ptx_read_laneid:
+ Changed |= addRangeMetadata(0, 32, Call);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
OpenPOWER on IntegriCloud