diff options
| author | Artem Belevich <tra@google.com> | 2018-08-03 18:05:24 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2018-08-03 18:05:24 +0000 |
| commit | 0a11b6366a926c731794facfd697371e829fcdc1 (patch) | |
| tree | 3310230aa0198d2d53b4a65c6f5572c15cf898dd | |
| parent | feb2a5886047f5188ba61c44b6f12222bdb83f18 (diff) | |
| download | bcm5719-llvm-0a11b6366a926c731794facfd697371e829fcdc1.tar.gz bcm5719-llvm-0a11b6366a926c731794facfd697371e829fcdc1.zip | |
[NVPTX] Handle __nvvm_reflect("__CUDA_ARCH").
Summary:
libdevice in recent CUDA versions relies on __nvvm_reflect() to select
GPU-specific bitcode. This patch addresses the requirement.
Reviewers: jlebar
Subscribers: jholewinski, sanjoy, hiraditya, bixia, llvm-commits
Differential Revision: https://reviews.llvm.org/D50207
llvm-svn: 338908
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTX.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVVMReflect.cpp | 10 | ||||
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll | 21 |
4 files changed, 33 insertions, 5 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 902d1b25e7d..02b8d8fff64 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -46,7 +46,7 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMPass(); FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion); -FunctionPass *createNVVMReflectPass(); +FunctionPass *createNVVMReflectPass(unsigned int SmVersion); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index a1b160441df..b9cce28726d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -195,7 +195,7 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { Builder.addExtension( PassManagerBuilder::EP_EarlyAsPossible, [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { - PM.add(createNVVMReflectPass()); + PM.add(createNVVMReflectPass(Subtarget.getSmVersion())); PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); }); } @@ -258,7 +258,8 @@ void NVPTXPassConfig::addIRPasses() { // it here does nothing. But since we need it for correctness when lowering // to NVPTX, run it here too, in case whoever built our pass pipeline didn't // call addEarlyAsPossiblePasses. - addPass(createNVVMReflectPass()); + const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); + addPass(createNVVMReflectPass(ST.getSmVersion())); if (getOptLevel() != CodeGenOpt::None) addPass(createNVPTXImageOptimizerPass()); diff --git a/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/llvm/lib/Target/NVPTX/NVVMReflect.cpp index 60971b48adf..64c262664fd 100644 --- a/llvm/lib/Target/NVPTX/NVVMReflect.cpp +++ b/llvm/lib/Target/NVPTX/NVVMReflect.cpp @@ -50,7 +50,9 @@ namespace { class NVVMReflect : public FunctionPass { public: static char ID; - NVVMReflect() : FunctionPass(ID) { + unsigned int SmVersion; + NVVMReflect() : NVVMReflect(0) {} + explicit NVVMReflect(unsigned int Sm) : FunctionPass(ID), SmVersion(Sm) { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); } @@ -58,7 +60,9 @@ public: }; } -FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); } +FunctionPass *llvm::createNVVMReflectPass(unsigned int SmVersion) { + return new NVVMReflect(SmVersion); +} static cl::opt<bool> NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden, @@ -163,6 +167,8 @@ bool NVVMReflect::runOnFunction(Function &F) { if (auto *Flag = mdconst::extract_or_null<ConstantInt>( F.getParent()->getModuleFlag("nvvm-reflect-ftz"))) ReflectVal = Flag->getSExtValue(); + } else if (ReflectArg == "__CUDA_ARCH") { + ReflectVal = SmVersion * 10; } Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal)); ToRemove.push_back(Call); diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll new file mode 100644 index 00000000000..8e8d866504d --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch.ll @@ -0,0 +1,21 @@ +; Libdevice in recent CUDA versions relies on __CUDA_ARCH reflecting GPU type. +; Verify that __nvvm_reflect() is replaced with an appropriate value. +; +; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 \ +; RUN: | FileCheck %s --check-prefixes=COMMON,SM20 +; RUN: opt %s -S -nvvm-reflect -O2 -mtriple=nvptx64 -mcpu=sm_35 \ +; RUN: | FileCheck %s --check-prefixes=COMMON,SM35 + +@"$str" = private addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00" + +declare i32 @__nvvm_reflect(i8*) + +; COMMON-LABEL: @foo +define i32 @foo(float %a, float %b) { +; COMMON-NOT: call i32 @__nvvm_reflect + %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([12 x i8], [12 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) +; SM20: ret i32 200 +; SM35: ret i32 350 + ret i32 %reflect +} + |

