diff options
author | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2018-09-27 19:22:56 +0000 |
---|---|---|
committer | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2018-09-27 19:22:56 +0000 |
commit | 02650d4c2cd060ab0b52cafafe3bdbdf71c704fd (patch) | |
tree | 599135279582174d0d67918ac7972b3b7618ac73 /clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | |
parent | 67392feb49c044c3728b432582a135964386bcb1 (diff) | |
download | bcm5719-llvm-02650d4c2cd060ab0b52cafafe3bdbdf71c704fd.tar.gz bcm5719-llvm-02650d4c2cd060ab0b52cafafe3bdbdf71c704fd.zip |
[OpenMP] Make default distribute schedule for NVPTX target regions in SPMD mode achieve coalescing
Summary: For the OpenMP NVPTX toolchain choose a default distribute schedule that ensures coalescing on the GPU when in SPMD mode. This significantly increases the performance of offloaded target code and reduces the number of registers used on the GPU side.
Reviewers: ABataev, caomhin, Hahnfeld
Reviewed By: ABataev, Hahnfeld
Subscribers: Hahnfeld, jholewinski, guansong, cfe-commits
Differential Revision: https://reviews.llvm.org/D52434
llvm-svn: 343253
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 45aafaa5c3c..56b244d0ae8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4081,3 +4081,15 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) { FunctionGlobalizedDecls.erase(CGF.CurFn); CGOpenMPRuntime::functionFinished(CGF); } + +void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const { + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { + ScheduleKind = OMPC_DIST_SCHEDULE_static; + Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + S.getIterationVariable()->getType(), S.getBeginLoc()); + } +} |