diff options
6 files changed, 216 insertions, 73 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index cee5ca65fc6..98d8b0ff607 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -140,13 +140,15 @@ public: /// to emit optimized code. class ExecutionModeRAII { private: - bool SavedMode; - bool &Mode; + CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode; + CGOpenMPRuntimeNVPTX::ExecutionMode &Mode; public: - ExecutionModeRAII(bool &Mode, bool NewMode) : Mode(Mode) { + ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD) + : Mode(Mode) { SavedMode = Mode; - Mode = NewMode; + Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD + : CGOpenMPRuntimeNVPTX::EM_NonSPMD; } ~ExecutionModeRAII() { Mode = SavedMode; } }; @@ -579,8 +581,9 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction( WorkerFn->setDoesNotRecurse(); } -bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const { - return IsInSPMDExecutionMode; +CGOpenMPRuntimeNVPTX::ExecutionMode +CGOpenMPRuntimeNVPTX::getExecutionMode() const { + return CurrentExecutionMode; } static CGOpenMPRuntimeNVPTX::DataSharingMode @@ -589,34 +592,96 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } -/// Check for inner (nested) SPMD construct, if any -static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) { - const auto *CS = D.getCapturedStmt(OMPD_target); - const auto *Body = CS->getCapturedStmt()->IgnoreContainers(); - const Stmt *ChildStmt = nullptr; +/// Checks if the \p Body is the \a CompoundStmt and returns its child statement +/// iff there is only one. +static const Stmt *getSingleCompoundChild(const Stmt *Body) { if (const auto *C = dyn_cast<CompoundStmt>(Body)) if (C->size() == 1) - ChildStmt = C->body_front(); - if (!ChildStmt) - return false; + return C->body_front(); + return Body; +} + +/// Check if the parallel directive has an 'if' clause with non-constant or +/// false condition. +static bool hasParallelIfClause(ASTContext &Ctx, + const OMPExecutableDirective &D) { + for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { + OpenMPDirectiveKind NameModifier = C->getNameModifier(); + if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown) + continue; + const Expr *Cond = C->getCondition(); + bool Result; + if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result) + return true; + } + return false; +} + +/// Check for inner (nested) SPMD construct, if any +static bool hasNestedSPMDDirective(ASTContext &Ctx, + const OMPExecutableDirective &D) { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = CS->getCapturedStmt()->IgnoreContainers(); + const Stmt *ChildStmt = getSingleCompoundChild(Body); if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); - // TODO: add further analysis for inner teams|distribute directives, if any. switch (D.getDirectiveKind()) { case OMPD_target: - return (isOpenMPParallelDirective(DKind) && - !isOpenMPTeamsDirective(DKind) && - !isOpenMPDistributeDirective(DKind)) || - isOpenMPSimdDirective(DKind) || - DKind == OMPD_teams_distribute_parallel_for; + if ((isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NestedDir)) || + isOpenMPSimdDirective(DKind)) + return true; + if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + if ((isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NND)) || + isOpenMPSimdDirective(DKind)) + return true; + if (DKind == OMPD_distribute) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Body); + if (!ChildStmt) + return false; + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + return (isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NND)) || + isOpenMPSimdDirective(DKind); + } + } + } + } + return false; case OMPD_target_teams: - return (isOpenMPParallelDirective(DKind) && - !isOpenMPDistributeDirective(DKind)) || - isOpenMPSimdDirective(DKind) || - DKind == OMPD_distribute_parallel_for; + if ((isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NestedDir)) || + isOpenMPSimdDirective(DKind)) + return true; + if (DKind == OMPD_distribute) { + Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(); + if (!Body) + return false; + ChildStmt = getSingleCompoundChild(Body); + if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) { + DKind = NND->getDirectiveKind(); + return (isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NND)) || + isOpenMPSimdDirective(DKind); + } + } + return false; case OMPD_target_teams_distribute: - return isOpenMPParallelDirective(DKind) || isOpenMPSimdDirective(DKind); + return (isOpenMPParallelDirective(DKind) && + !hasParallelIfClause(Ctx, *NestedDir)) || + isOpenMPSimdDirective(DKind); case OMPD_target_simd: case OMPD_target_parallel: case OMPD_target_parallel_for: @@ -674,20 +739,22 @@ static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) { return false; } -static bool supportsSPMDExecutionMode(const OMPExecutableDirective &D) { +static bool supportsSPMDExecutionMode(ASTContext &Ctx, + const OMPExecutableDirective &D) { OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); switch (DirectiveKind) { case OMPD_target: case OMPD_target_teams: case OMPD_target_teams_distribute: - return hasNestedSPMDDirective(D); - case OMPD_target_simd: + return hasNestedSPMDDirective(Ctx, D); case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: - case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: + return !hasParallelIfClause(Ctx, D); + case OMPD_target_simd: + case OMPD_target_teams_distribute_simd: return true; case OMPD_parallel: case OMPD_for: @@ -744,7 +811,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/false); + ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false); EntryFunctionState EST; WorkerFunctionState WST(CGM, D.getLocStart()); Work.clear(); @@ -858,7 +925,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/true); + ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true); EntryFunctionState EST; // Emit target region as a standalone region. @@ -905,11 +972,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( CGF.EmitBlock(ExecuteBB); + IsInTargetMasterThreadRegion = true; emitGenericVarsProlog(CGF, D.getLocStart()); } void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST) { + IsInTargetMasterThreadRegion = false; if (!CGF.HaveInsertPoint()) return; @@ -1380,7 +1449,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); - bool Mode = supportsSPMDExecutionMode(D); + bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); if (Mode) emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); @@ -1401,8 +1470,8 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) { // Do nothing in case of Spmd mode and L0 parallel. - // TODO: If in Spmd mode and L1 parallel emit the clause. - if (isInSpmdExecutionMode()) + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD && + IsInTargetMasterThreadRegion) return; CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc); @@ -1412,8 +1481,8 @@ void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { // Do nothing in case of Spmd mode and L0 parallel. - // TODO: If in Spmd mode and L1 parallel emit the clause. - if (isInSpmdExecutionMode()) + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD && + IsInTargetMasterThreadRegion) return; CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc); @@ -1457,7 +1526,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; - if (!isInSpmdExecutionMode() && !IsInParallelRegion) { + if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD && + !IsInParallelRegion) { llvm::Function *WrapperFun = createParallelDataSharingWrapper(OutlinedFun, D); WrapperFunctionsMap[OutlinedFun] = WrapperFun; @@ -1635,7 +1705,7 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall( if (!CGF.HaveInsertPoint()) return; - if (isInSpmdExecutionMode()) + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); else emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); @@ -1759,6 +1829,8 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( SeqGen(CGF, Action); } else if (IsInTargetMasterThreadRegion) { L0ParallelGen(CGF, Action); + } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) { + RCG(CGF); } else { // Check for master and then parallelism: // if (is_master) { @@ -1770,20 +1842,18 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( // } CGBuilderTy &Bld = CGF.Builder; llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); - if (!isInSpmdExecutionMode()) { - llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); - llvm::BasicBlock *ParallelCheckBB = - CGF.createBasicBlock(".parallelcheck"); - llvm::Value *IsMaster = - Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); - Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB); - CGF.EmitBlock(MasterCheckBB); - L0ParallelGen(CGF, Action); - CGF.EmitBranch(ExitBB); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ParallelCheckBB); - } + llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck"); + llvm::BasicBlock *ParallelCheckBB = + CGF.createBasicBlock(".parallelcheck"); + llvm::Value *IsMaster = + Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF)); + Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB); + CGF.EmitBlock(MasterCheckBB); + L0ParallelGen(CGF, Action); + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ParallelCheckBB); llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); llvm::Value *ThreadID = getThreadID(CGF, Loc); llvm::Value *PL = CGF.EmitRuntimeCall( @@ -1827,14 +1897,49 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( // is added on Spmd target directives. llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; - Address ZeroAddr = CGF.CreateMemTemp( - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), - ".zero.addr"); + Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth( + /*DestWidth=*/32, /*Signed=*/1), + ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); - OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); - OutlinedFnArgs.push_back(ZeroAddr.getPointer()); - OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); - emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); + Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr, + ThreadIDAddr](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); + }; + auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF, + PrePostActionTy &) { + + RegionCodeGenTy RCG(CodeGen); + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *Args[] = {RTLoc, ThreadID}; + + NVPTXActionTy Action( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel), + Args, + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel), + Args); + RCG.setAction(Action); + RCG(CGF); + }; + + if (IsInTargetMasterThreadRegion) { + RegionCodeGenTy RCG(CodeGen); + RCG(CGF); + } else { + // If we are not in the target region, it is definitely L2 parallelism or + // more, because for SPMD mode we always has L1 parallel level, sowe don't + // need to check for orphaned directives. + RegionCodeGenTy RCG(SeqGen); + RCG(CGF); + } } void CGOpenMPRuntimeNVPTX::emitCriticalRegion( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 88420b90497..c7d647bfdb1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -24,6 +24,16 @@ namespace clang { namespace CodeGen { class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { +public: + /// Defines the execution mode. + enum ExecutionMode { + /// SPMD execution mode (all threads are worker threads). + EM_SPMD, + /// Non-SPMD execution mode (1 master thread, others are workers). + EM_NonSPMD, + /// Unknown execution mode (orphaned directive). + EM_Unknown, + }; private: /// Parallel outlined function work for workers to execute. llvm::SmallVector<llvm::Function *, 16> Work; @@ -44,7 +54,7 @@ private: void createWorkerFunction(CodeGenModule &CGM); }; - bool isInSpmdExecutionMode() const; + ExecutionMode getExecutionMode() const; /// Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); @@ -334,7 +344,7 @@ private: /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the /// target region and used by containing directives such as 'parallel' /// to emit optimized code. - bool IsInSPMDExecutionMode = false; + ExecutionMode CurrentExecutionMode = EM_Unknown; /// true if we're emitting the code for the target region and next parallel /// region is L0 for sure. diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 071b08d8ad8..8f496eb80e4 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -58,6 +58,7 @@ tx ftemplate(int n) { #pragma omp critical ++a; } + ++a; } return a; } diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index eec9507c250..aa054fffc4b 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -9,8 +9,9 @@ #define HEADER // Check that the execution mode of all 2 target regions is set to Generic Mode. -// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1 -// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0 template<typename tx> tx ftemplate(int n) { @@ -33,6 +34,13 @@ tx ftemplate(int n) { aa = 1; } + #pragma omp target teams + { +#pragma omp parallel +#pragma omp parallel + aa = 1; + } + return a; } @@ -44,14 +52,14 @@ int bar(int n){ return a; } - // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l21}}_worker() + // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l22}}_worker() - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l27}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -86,7 +94,7 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l26]](i[[SZ:32|64]] [[A:%[^)]+]]) + // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l27]](i[[SZ:32|64]] [[A:%[^)]+]]) // CHECK: store i[[SZ]] [[A]], i[[SZ]]* [[A_ADDR:%.+]], align // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i8* @@ -137,7 +145,7 @@ int bar(int n){ - // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}_worker() + // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l32}}_worker() // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8, // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*, // CHECK: store i8* null, i8** [[OMP_WORK_FN]], @@ -172,7 +180,7 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l31]](i[[SZ:32|64]] [[AA:%[^)]+]]) + // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l32]](i[[SZ:32|64]] [[AA:%[^)]+]]) // CHECK: store i[[SZ]] [[AA]], i[[SZ]]* [[AA_ADDR:%.+]], align // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16* @@ -218,5 +226,24 @@ int bar(int n){ // CHECK: [[EXIT]] // CHECK: ret void +// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37( +// CHECK: call void @__kmpc_spmd_kernel_init( +// CHECK: call i8* @__kmpc_data_sharing_push_stack( +// CHECK-NOT: call void @__kmpc_serialized_parallel( +// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}}) +// CHECK-NOT: call void @__kmpc_end_serialized_parallel( +// CHECK: call void @__kmpc_data_sharing_pop_stack( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret + +// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable +// CHECK: call void @__kmpc_serialized_parallel( +// CHECK: call void [[L1:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.+}}) +// CHECK: call void @__kmpc_end_serialized_parallel( +// CHECK: ret void + +// CHECK: define internal void [[L1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable +// CHECK: store i16 1, i16* % +// CHECK: ret void #endif diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index b9bd9fe28a0..2e712e2d51d 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -13,7 +13,7 @@ int a; int foo(int *a); int main(int argc, char **argv) { -#pragma omp target teams distribute parallel for map(tofrom:a) if(parallel:argc) +#pragma omp target teams distribute parallel for map(tofrom:a) if(target:argc) schedule(static, a) for (int i= 0; i < argc; ++i) a = foo(&i) + foo(&a) + foo(&argc); return 0; diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index aab9f50d87b..ebd761fb969 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=45 | FileCheck %s // expected-no-diagnostics int main() { @@ -11,7 +11,7 @@ int main() { int c[10][10][10]; #pragma omp target parallel firstprivate(a, b) map(tofrom \ : c) map(tofrom \ - : bb) if (a) + : bb) if (target:a) { int &f = c[1][1][1]; int &g = a; @@ -54,7 +54,7 @@ int main() { return 0; } -// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}}) +// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}) // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* // CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}}) |