diff options
Diffstat (limited to 'llvm/lib/Target/NVPTX')
| -rw-r--r-- | llvm/lib/Target/NVPTX/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTX.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVVMIntrRange.cpp | 154 |
4 files changed, 159 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index ed43296b870..b67c4050086 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -32,6 +32,7 @@ set(NVPTXCodeGen_sources NVPTXTargetMachine.cpp NVPTXTargetTransformInfo.cpp NVPTXUtilities.cpp + NVVMIntrRange.cpp NVVMReflect.cpp ) diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 15a4205f6aa..e91385ac13f 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -47,6 +47,7 @@ ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMPass(); FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass(); FunctionPass *createNVPTXInferAddressSpacesPass(); +FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion); FunctionPass *createNVVMReflectPass(); FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 583baad65c9..b9f5919964c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -55,6 +55,7 @@ static cl::opt<bool> UseInferAddressSpaces( "NVPTXFavorNonGenericAddrSpaces")); namespace llvm { +void initializeNVVMIntrRangePass(PassRegistry&); void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); @@ -75,6 +76,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeNVVMReflectPass(PR); + initializeNVVMIntrRangePass(PR); initializeGenericToNVVMPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); @@ -176,6 +178,7 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { PM.add(createNVVMReflectPass()); + PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); } TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { diff --git a/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp new file mode 100644 index 00000000000..09f328d4c53 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -0,0 +1,154 @@ +//===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass adds appropriate !range metadata for calls to NVVM +// intrinsics that return a limited range of values. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvvm-intr-range" + +namespace llvm { void initializeNVVMIntrRangePass(PassRegistry &); } + +// Add !range metadata based on limits of given SM variant. +static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20), + cl::Hidden, cl::desc("SM variant")); + +namespace { +class NVVMIntrRange : public FunctionPass { + private: + struct { + unsigned x, y, z; + } MaxBlockSize, MaxGridSize; + + public: + static char ID; + NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {} + NVVMIntrRange(unsigned int SmVersion) + : FunctionPass(ID), MaxBlockSize{1024, 1024, 64}, + MaxGridSize{SmVersion >= 30 ? 0x7fffffffu : 0xffffu, 0xffff, 0xffff} { + initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &) override; +}; +} + +FunctionPass *llvm::createNVVMIntrRangePass(unsigned int SmVersion) { + return new NVVMIntrRange(SmVersion); +} + +char NVVMIntrRange::ID = 0; +INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range", + "Add !range metadata to NVVM intrinsics.", false, false) + +// Adds the passed-in [Low,High) range information as metadata to the +// passed-in call instruction. +static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) { + LLVMContext &Context = C->getParent()->getContext(); + IntegerType *Int32Ty = Type::getInt32Ty(Context); + Metadata *LowAndHigh[] = { + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Low)), + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, High))}; + C->setMetadata(LLVMContext::MD_range, MDNode::get(Context, LowAndHigh)); + return true; +} + +bool NVVMIntrRange::runOnFunction(Function &F) { + // Go through the calls in this function. + bool Changed = false; + for (Instruction &I : instructions(F)) { + CallInst *Call = dyn_cast<CallInst>(&I); + if (!Call) + continue; + + if (Function *Callee = Call->getCalledFunction()) { + switch (Callee->getIntrinsicID()) { + // Index within block + case Intrinsic::ptx_read_tid_x: + case Intrinsic::nvvm_read_ptx_sreg_tid_x: + Changed |= addRangeMetadata(0, MaxBlockSize.x, Call); + break; + case Intrinsic::ptx_read_tid_y: + case Intrinsic::nvvm_read_ptx_sreg_tid_y: + Changed |= addRangeMetadata(0, MaxBlockSize.y, Call); + break; + case Intrinsic::ptx_read_tid_z: + case Intrinsic::nvvm_read_ptx_sreg_tid_z: + Changed |= addRangeMetadata(0, MaxBlockSize.z, Call); + break; + + // Block size + case Intrinsic::ptx_read_ntid_x: + case Intrinsic::nvvm_read_ptx_sreg_ntid_x: + Changed |= addRangeMetadata(1, MaxBlockSize.x+1, Call); + break; + case Intrinsic::ptx_read_ntid_y: + case Intrinsic::nvvm_read_ptx_sreg_ntid_y: + Changed |= addRangeMetadata(1, MaxBlockSize.y+1, Call); + break; + case Intrinsic::ptx_read_ntid_z: + case Intrinsic::nvvm_read_ptx_sreg_ntid_z: + Changed |= addRangeMetadata(1, MaxBlockSize.z+1, Call); + break; + + // Index within grid + case Intrinsic::ptx_read_ctaid_x: + case Intrinsic::nvvm_read_ptx_sreg_ctaid_x: + Changed |= addRangeMetadata(0, MaxGridSize.x, Call); + break; + case Intrinsic::ptx_read_ctaid_y: + case Intrinsic::nvvm_read_ptx_sreg_ctaid_y: + Changed |= addRangeMetadata(0, MaxGridSize.y, Call); + break; + case Intrinsic::ptx_read_ctaid_z: + case Intrinsic::nvvm_read_ptx_sreg_ctaid_z: + Changed |= addRangeMetadata(0, MaxGridSize.z, Call); + break; + + // Grid size + case Intrinsic::ptx_read_nctaid_x: + case Intrinsic::nvvm_read_ptx_sreg_nctaid_x: + Changed |= addRangeMetadata(1, MaxGridSize.x+1, Call); + break; + case Intrinsic::ptx_read_nctaid_y: + case Intrinsic::nvvm_read_ptx_sreg_nctaid_y: + Changed |= addRangeMetadata(1, MaxGridSize.y+1, Call); + break; + case Intrinsic::ptx_read_nctaid_z: + case Intrinsic::nvvm_read_ptx_sreg_nctaid_z: + Changed |= addRangeMetadata(1, MaxGridSize.z+1, Call); + break; + + // warp size is constant 32. + case Intrinsic::nvvm_read_ptx_sreg_warpsize: + Changed |= addRangeMetadata(32, 32+1, Call); + break; + + // Lane ID is [0..warpsize) + case Intrinsic::ptx_read_laneid: + Changed |= addRangeMetadata(0, 32, Call); + break; + + default: + break; + } + } + } + + return Changed; +} |

