summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
diff options
context:
space:
mode:
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2018-09-27 19:22:56 +0000
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2018-09-27 19:22:56 +0000
commit02650d4c2cd060ab0b52cafafe3bdbdf71c704fd (patch)
tree599135279582174d0d67918ac7972b3b7618ac73 /clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
parent67392feb49c044c3728b432582a135964386bcb1 (diff)
downloadbcm5719-llvm-02650d4c2cd060ab0b52cafafe3bdbdf71c704fd.tar.gz
bcm5719-llvm-02650d4c2cd060ab0b52cafafe3bdbdf71c704fd.zip
[OpenMP] Make default distribute schedule for NVPTX target regions in SPMD mode achieve coalescing
Summary: For the OpenMP NVPTX toolchain choose a default distribute schedule that ensures coalescing on the GPU when in SPMD mode. This significantly increases the performance of offloaded target code and reduces the number of registers used on the GPU side. Reviewers: ABataev, caomhin, Hahnfeld Reviewed By: ABataev, Hahnfeld Subscribers: Hahnfeld, jholewinski, guansong, cfe-commits Differential Revision: https://reviews.llvm.org/D52434 llvm-svn: 343253
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp12
1 files changed, 12 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 45aafaa5c3c..56b244d0ae8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4081,3 +4081,15 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
FunctionGlobalizedDecls.erase(CGF.CurFn);
CGOpenMPRuntime::functionFinished(CGF);
}
+
+void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const {
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+ ScheduleKind = OMPC_DIST_SCHEDULE_static;
+ Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ S.getIterationVariable()->getType(), S.getBeginLoc());
+ }
+}
OpenPOWER on IntegriCloud