summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2018-09-27 19:22:56 +0000
committerGheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>2018-09-27 19:22:56 +0000
commit02650d4c2cd060ab0b52cafafe3bdbdf71c704fd (patch)
tree599135279582174d0d67918ac7972b3b7618ac73 /clang/lib/CodeGen
parent67392feb49c044c3728b432582a135964386bcb1 (diff)
downloadbcm5719-llvm-02650d4c2cd060ab0b52cafafe3bdbdf71c704fd.tar.gz
bcm5719-llvm-02650d4c2cd060ab0b52cafafe3bdbdf71c704fd.zip
[OpenMP] Make default distribute schedule for NVPTX target regions in SPMD mode achieve coalescing
Summary: For the OpenMP NVPTX toolchain choose a default distribute schedule that ensures coalescing on the GPU when in SPMD mode. This significantly increases the performance of offloaded target code and reduces the number of registers used on the GPU side. Reviewers: ABataev, caomhin, Hahnfeld Reviewed By: ABataev, Hahnfeld Subscribers: Hahnfeld, jholewinski, guansong, cfe-commits Differential Revision: https://reviews.llvm.org/D52434 llvm-svn: 343253
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h6
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp12
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h5
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp4
4 files changed, 27 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index e0685d9bc66..982aeb3cf7a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -1490,6 +1490,12 @@ public:
const VarDecl *NativeParam,
const VarDecl *TargetParam) const;
+ /// Choose default schedule type and chunk value for the
+ /// dist_schedule clause.
+ virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const {}
+
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
virtual void
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 45aafaa5c3c..56b244d0ae8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4081,3 +4081,15 @@ void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
FunctionGlobalizedDecls.erase(CGF.CurFn);
CGOpenMPRuntime::functionFinished(CGF);
}
+
+void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const {
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
+ ScheduleKind = OMPC_DIST_SCHEDULE_static;
+ Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ S.getIterationVariable()->getType(), S.getBeginLoc());
+ }
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 85ed838d473..76343dfc7f5 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -340,6 +340,11 @@ public:
///
void functionFinished(CodeGenFunction &CGF) override;
+ /// Choose a default value for the schedule clause.
+ void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+ const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+ llvm::Value *&Chunk) const override;
+
private:
/// Track the execution mode when codegening directives within a target
/// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 7305b0f3213..4bafb8ba9fc 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3325,6 +3325,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
S.getIterationVariable()->getType(),
S.getBeginLoc());
}
+ } else {
+ // Default behaviour for dist_schedule clause.
+ CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
+ *this, S, ScheduleKind, Chunk);
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
OpenPOWER on IntegriCloud