summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp223
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h14
-rw-r--r--clang/test/OpenMP/nvptx_parallel_codegen.cpp1
-rw-r--r--clang/test/OpenMP/nvptx_target_teams_codegen.cpp41
-rw-r--r--clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp2
-rw-r--r--clang/test/OpenMP/target_parallel_debug_codegen.cpp8
6 files changed, 216 insertions, 73 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index cee5ca65fc6..98d8b0ff607 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -140,13 +140,15 @@ public:
/// to emit optimized code.
class ExecutionModeRAII {
private:
- bool SavedMode;
- bool &Mode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
+ CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
public:
- ExecutionModeRAII(bool &Mode, bool NewMode) : Mode(Mode) {
+ ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
+ : Mode(Mode) {
SavedMode = Mode;
- Mode = NewMode;
+ Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
+ : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
}
~ExecutionModeRAII() { Mode = SavedMode; }
};
@@ -579,8 +581,9 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
WorkerFn->setDoesNotRecurse();
}
-bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
- return IsInSPMDExecutionMode;
+CGOpenMPRuntimeNVPTX::ExecutionMode
+CGOpenMPRuntimeNVPTX::getExecutionMode() const {
+ return CurrentExecutionMode;
}
static CGOpenMPRuntimeNVPTX::DataSharingMode
@@ -589,34 +592,96 @@ getDataSharingMode(CodeGenModule &CGM) {
: CGOpenMPRuntimeNVPTX::Generic;
}
-/// Check for inner (nested) SPMD construct, if any
-static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) {
- const auto *CS = D.getCapturedStmt(OMPD_target);
- const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
- const Stmt *ChildStmt = nullptr;
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one.
+static const Stmt *getSingleCompoundChild(const Stmt *Body) {
if (const auto *C = dyn_cast<CompoundStmt>(Body))
if (C->size() == 1)
- ChildStmt = C->body_front();
- if (!ChildStmt)
- return false;
+ return C->body_front();
+ return Body;
+}
+
+/// Check if the parallel directive has an 'if' clause with non-constant or
+/// false condition.
+static bool hasParallelIfClause(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
+ OpenMPDirectiveKind NameModifier = C->getNameModifier();
+ if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown)
+ continue;
+ const Expr *Cond = C->getCondition();
+ bool Result;
+ if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result)
+ return true;
+ }
+ return false;
+}
+
+/// Check for inner (nested) SPMD construct, if any
+static bool hasNestedSPMDDirective(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
+ const Stmt *ChildStmt = getSingleCompoundChild(Body);
if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
- // TODO: add further analysis for inner teams|distribute directives, if any.
switch (D.getDirectiveKind()) {
case OMPD_target:
- return (isOpenMPParallelDirective(DKind) &&
- !isOpenMPTeamsDirective(DKind) &&
- !isOpenMPDistributeDirective(DKind)) ||
- isOpenMPSimdDirective(DKind) ||
- DKind == OMPD_teams_distribute_parallel_for;
+ if ((isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfClause(Ctx, *NestedDir)) ||
+ isOpenMPSimdDirective(DKind))
+ return true;
+ if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if ((isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfClause(Ctx, *NND)) ||
+ isOpenMPSimdDirective(DKind))
+ return true;
+ if (DKind == OMPD_distribute) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (!ChildStmt)
+ return false;
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ return (isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfClause(Ctx, *NND)) ||
+ isOpenMPSimdDirective(DKind);
+ }
+ }
+ }
+ }
+ return false;
case OMPD_target_teams:
- return (isOpenMPParallelDirective(DKind) &&
- !isOpenMPDistributeDirective(DKind)) ||
- isOpenMPSimdDirective(DKind) ||
- DKind == OMPD_distribute_parallel_for;
+ if ((isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfClause(Ctx, *NestedDir)) ||
+ isOpenMPSimdDirective(DKind))
+ return true;
+ if (DKind == OMPD_distribute) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+ if (!Body)
+ return false;
+ ChildStmt = getSingleCompoundChild(Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ return (isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfClause(Ctx, *NND)) ||
+ isOpenMPSimdDirective(DKind);
+ }
+ }
+ return false;
case OMPD_target_teams_distribute:
- return isOpenMPParallelDirective(DKind) || isOpenMPSimdDirective(DKind);
+ return (isOpenMPParallelDirective(DKind) &&
+ !hasParallelIfClause(Ctx, *NestedDir)) ||
+ isOpenMPSimdDirective(DKind);
case OMPD_target_simd:
case OMPD_target_parallel:
case OMPD_target_parallel_for:
@@ -674,20 +739,22 @@ static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) {
return false;
}
-static bool supportsSPMDExecutionMode(const OMPExecutableDirective &D) {
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+ const OMPExecutableDirective &D) {
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
case OMPD_target_teams:
case OMPD_target_teams_distribute:
- return hasNestedSPMDDirective(D);
- case OMPD_target_simd:
+ return hasNestedSPMDDirective(Ctx, D);
case OMPD_target_parallel:
case OMPD_target_parallel_for:
case OMPD_target_parallel_for_simd:
- case OMPD_target_teams_distribute_simd:
case OMPD_target_teams_distribute_parallel_for:
case OMPD_target_teams_distribute_parallel_for_simd:
+ return !hasParallelIfClause(Ctx, D);
+ case OMPD_target_simd:
+ case OMPD_target_teams_distribute_simd:
return true;
case OMPD_parallel:
case OMPD_for:
@@ -744,7 +811,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/false);
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
EntryFunctionState EST;
WorkerFunctionState WST(CGM, D.getLocStart());
Work.clear();
@@ -858,7 +925,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D,
llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen) {
- ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/true);
+ ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
EntryFunctionState EST;
// Emit target region as a standalone region.
@@ -905,11 +972,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader(
CGF.EmitBlock(ExecuteBB);
+ IsInTargetMasterThreadRegion = true;
emitGenericVarsProlog(CGF, D.getLocStart());
}
void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
EntryFunctionState &EST) {
+ IsInTargetMasterThreadRegion = false;
if (!CGF.HaveInsertPoint())
return;
@@ -1380,7 +1449,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
assert(!ParentName.empty() && "Invalid target region parent name!");
- bool Mode = supportsSPMDExecutionMode(D);
+ bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
if (Mode)
emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
CodeGen);
@@ -1401,8 +1470,8 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
OpenMPProcBindClauseKind ProcBind,
SourceLocation Loc) {
// Do nothing in case of Spmd mode and L0 parallel.
- // TODO: If in Spmd mode and L1 parallel emit the clause.
- if (isInSpmdExecutionMode())
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
+ IsInTargetMasterThreadRegion)
return;
CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
@@ -1412,8 +1481,8 @@ void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
// Do nothing in case of Spmd mode and L0 parallel.
- // TODO: If in Spmd mode and L1 parallel emit the clause.
- if (isInSpmdExecutionMode())
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
+ IsInTargetMasterThreadRegion)
return;
CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
@@ -1457,7 +1526,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
- if (!isInSpmdExecutionMode() && !IsInParallelRegion) {
+ if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
+ !IsInParallelRegion) {
llvm::Function *WrapperFun =
createParallelDataSharingWrapper(OutlinedFun, D);
WrapperFunctionsMap[OutlinedFun] = WrapperFun;
@@ -1635,7 +1705,7 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall(
if (!CGF.HaveInsertPoint())
return;
- if (isInSpmdExecutionMode())
+ if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
else
emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
@@ -1759,6 +1829,8 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
SeqGen(CGF, Action);
} else if (IsInTargetMasterThreadRegion) {
L0ParallelGen(CGF, Action);
+ } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) {
+ RCG(CGF);
} else {
// Check for master and then parallelism:
// if (is_master) {
@@ -1770,20 +1842,18 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
// }
CGBuilderTy &Bld = CGF.Builder;
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
- if (!isInSpmdExecutionMode()) {
- llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
- llvm::BasicBlock *ParallelCheckBB =
- CGF.createBasicBlock(".parallelcheck");
- llvm::Value *IsMaster =
- Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
- Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB);
- CGF.EmitBlock(MasterCheckBB);
- L0ParallelGen(CGF, Action);
- CGF.EmitBranch(ExitBB);
- // There is no need to emit line number for unconditional branch.
- (void)ApplyDebugLocation::CreateEmpty(CGF);
- CGF.EmitBlock(ParallelCheckBB);
- }
+ llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+ llvm::BasicBlock *ParallelCheckBB =
+ CGF.createBasicBlock(".parallelcheck");
+ llvm::Value *IsMaster =
+ Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+ Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB);
+ CGF.EmitBlock(MasterCheckBB);
+ L0ParallelGen(CGF, Action);
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ParallelCheckBB);
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
llvm::Value *ThreadID = getThreadID(CGF, Loc);
llvm::Value *PL = CGF.EmitRuntimeCall(
@@ -1827,14 +1897,49 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
// is added on Spmd target directives.
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- Address ZeroAddr = CGF.CreateMemTemp(
- CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
- ".zero.addr");
+ Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/32, /*Signed=*/1),
+ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
- OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
- OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
- emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+ Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+ auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
+ ThreadIDAddr](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+
+ llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+ OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+ emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+ };
+ auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+
+ RegionCodeGenTy RCG(CodeGen);
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *Args[] = {RTLoc, ThreadID};
+
+ NVPTXActionTy Action(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+ Args,
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+ Args);
+ RCG.setAction(Action);
+ RCG(CGF);
+ };
+
+ if (IsInTargetMasterThreadRegion) {
+ RegionCodeGenTy RCG(CodeGen);
+ RCG(CGF);
+ } else {
+ // If we are not in the target region, it is definitely L2 parallelism or
+ // more, because for SPMD mode we always has L1 parallel level, sowe don't
+ // need to check for orphaned directives.
+ RegionCodeGenTy RCG(SeqGen);
+ RCG(CGF);
+ }
}
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 88420b90497..c7d647bfdb1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -24,6 +24,16 @@ namespace clang {
namespace CodeGen {
class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+public:
+ /// Defines the execution mode.
+ enum ExecutionMode {
+ /// SPMD execution mode (all threads are worker threads).
+ EM_SPMD,
+ /// Non-SPMD execution mode (1 master thread, others are workers).
+ EM_NonSPMD,
+ /// Unknown execution mode (orphaned directive).
+ EM_Unknown,
+ };
private:
/// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
@@ -44,7 +54,7 @@ private:
void createWorkerFunction(CodeGenModule &CGM);
};
- bool isInSpmdExecutionMode() const;
+ ExecutionMode getExecutionMode() const;
/// Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
@@ -334,7 +344,7 @@ private:
/// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
/// target region and used by containing directives such as 'parallel'
/// to emit optimized code.
- bool IsInSPMDExecutionMode = false;
+ ExecutionMode CurrentExecutionMode = EM_Unknown;
/// true if we're emitting the code for the target region and next parallel
/// region is L0 for sure.
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index 071b08d8ad8..8f496eb80e4 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -58,6 +58,7 @@ tx ftemplate(int n) {
#pragma omp critical
++a;
}
+ ++a;
}
return a;
}
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index eec9507c250..aa054fffc4b 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -9,8 +9,9 @@
#define HEADER
// Check that the execution mode of all 2 target regions is set to Generic Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
template<typename tx>
tx ftemplate(int n) {
@@ -33,6 +34,13 @@ tx ftemplate(int n) {
aa = 1;
}
+ #pragma omp target teams
+ {
+#pragma omp parallel
+#pragma omp parallel
+ aa = 1;
+ }
+
return a;
}
@@ -44,14 +52,14 @@ int bar(int n){
return a;
}
- // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l21}}_worker()
+ // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l22}}_worker()
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l27}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -86,7 +94,7 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l26]](i[[SZ:32|64]] [[A:%[^)]+]])
+ // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l27]](i[[SZ:32|64]] [[A:%[^)]+]])
// CHECK: store i[[SZ]] [[A]], i[[SZ]]* [[A_ADDR:%.+]], align
// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i8*
@@ -137,7 +145,7 @@ int bar(int n){
- // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}_worker()
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l32}}_worker()
// CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
// CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
// CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -172,7 +180,7 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
- // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l31]](i[[SZ:32|64]] [[AA:%[^)]+]])
+ // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l32]](i[[SZ:32|64]] [[AA:%[^)]+]])
// CHECK: store i[[SZ]] [[AA]], i[[SZ]]* [[AA_ADDR:%.+]], align
// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
@@ -218,5 +226,24 @@ int bar(int n){
// CHECK: [[EXIT]]
// CHECK: ret void
+// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
+// CHECK: call void @__kmpc_spmd_kernel_init(
+// CHECK: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK-NOT: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
+// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_data_sharing_pop_stack(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret
+
+// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L1:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.+}})
+// CHECK: call void @__kmpc_end_serialized_parallel(
+// CHECK: ret void
+
+// CHECK: define internal void [[L1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: store i16 1, i16* %
+// CHECK: ret void
#endif
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index b9bd9fe28a0..2e712e2d51d 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -13,7 +13,7 @@ int a;
int foo(int *a);
int main(int argc, char **argv) {
-#pragma omp target teams distribute parallel for map(tofrom:a) if(parallel:argc)
+#pragma omp target teams distribute parallel for map(tofrom:a) if(target:argc) schedule(static, a)
for (int i= 0; i < argc; ++i)
a = foo(&i) + foo(&a) + foo(&argc);
return 0;
diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
index aab9f50d87b..ebd761fb969 100644
--- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=45 | FileCheck %s
// expected-no-diagnostics
int main() {
@@ -11,7 +11,7 @@ int main() {
int c[10][10][10];
#pragma omp target parallel firstprivate(a, b) map(tofrom \
: c) map(tofrom \
- : bb) if (a)
+ : bb) if (target:a)
{
int &f = c[1][1][1];
int &g = a;
@@ -54,7 +54,7 @@ int main() {
return 0;
}
-// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}})
+// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}})
// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})
OpenPOWER on IntegriCloud