diff options
| -rw-r--r-- | clang/include/clang/Basic/Attr.td | 8 | ||||
| -rw-r--r-- | clang/include/clang/Sema/Sema.h | 5 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 26 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 78 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 25 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 69 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaExpr.cpp | 2 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaOpenMP.cpp | 33 | ||||
| -rw-r--r-- | clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp | 70 | ||||
| -rw-r--r-- | clang/test/OpenMP/target_parallel_debug_codegen.cpp | 97 |
10 files changed, 359 insertions, 54 deletions
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 3de9e8e8ddd..431905ece40 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2685,6 +2685,14 @@ def OMPCaptureNoInit : InheritableAttr { let Documentation = [Undocumented]; } +def OMPCaptureKind : Attr { + // This attribute has no spellings as it is only ever created implicitly. + let Spellings = []; + let SemaHandler = 0; + let Args = [UnsignedArgument<"CaptureKind">]; + let Documentation = [Undocumented]; +} + def OMPDeclareSimdDecl : Attr { let Spellings = [Pragma<"omp", "declare simd">]; let Subjects = SubjectList<[Function]>; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index e804a7962a7..30fb56e338e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8527,6 +8527,11 @@ public: /// is performed. bool isOpenMPPrivateDecl(ValueDecl *D, unsigned Level); + /// Sets OpenMP capture kind (OMPC_private, OMPC_firstprivate, OMPC_map etc.) + /// for \p FD based on DSA for the provided corresponding captured declaration + /// \p D. + void setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level); + /// \brief Check if the specified variable is captured by 'target' directive. /// \param Level Relative level of nested OpenMP construct for that the check /// is performed. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 809d9fa0a92..4f75e250cb9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1325,6 +1325,32 @@ public: virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C); + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + virtual const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + return NativeParam; + } + + typedef llvm::function_ref<void(CodeGenFunction &, const VarDecl *, Address)> + MappingFnType; + /// Maps the native argument to the address of the corresponding + /// target-specific argument. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + /// \param MapFn Function that maps the native parameter to the address of the + /// target-specific. + virtual void mapParameterAddress(CodeGenFunction &CGF, const FieldDecl *FD, + const VarDecl *NativeParam, + const VarDecl *TargetParam, + const MappingFnType) const { + assert(NativeParam == TargetParam && + "native and target args must be the same"); + } + /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index d6a82577965..c2eb83de9a2 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -2238,3 +2238,81 @@ void CGOpenMPRuntimeNVPTX::emitReduction( CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } + +const VarDecl * +CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + if (!NativeParam->getType()->isReferenceType()) + return NativeParam; + QualType ArgType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(ArgType); + QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); + if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) { + if (Attr->getCaptureKind() == OMPC_map) { + PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, + LangAS::opencl_global); + } + } + ArgType = CGM.getContext().getPointerType(PointeeTy); + QC.addRestrict(); + enum { NVPTX_local_addr = 5 }; + QC.addAddressSpace(NVPTX_local_addr); + ArgType = QC.apply(CGM.getContext(), ArgType); + return ImplicitParamDecl::Create( + CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), + NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); +} + +void CGOpenMPRuntimeNVPTX::mapParameterAddress( + CodeGenFunction &CGF, const FieldDecl *FD, const VarDecl *NativeParam, + const VarDecl *TargetParam, + const CGOpenMPRuntime::MappingFnType MapFn) const { + assert(NativeParam != TargetParam && + NativeParam->getType()->isReferenceType() && + "Native arg must not be the same as target arg."); + Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam); + QualType NativeParamType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(NativeParamType); + QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType(); + unsigned NativePointeeAddrSpace = + NativePointeeTy.getQualifiers().getAddressSpace(); + QualType TargetPointeeTy = TargetParam->getType()->getPointeeType(); + llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( + LocalAddr, /*Volatile=*/false, TargetPointeeTy, SourceLocation()); + // First cast to generic. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + // Cast from generic to native address space. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + NativePointeeAddrSpace)); + Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType); + CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false, + NativeParam->getType()); + MapFn(CGF, NativeParam, NativeParamAddr); +} + +void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( + CodeGenFunction &CGF, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args) const { + SmallVector<llvm::Value *, 4> TargetArgs; + auto *FnType = + cast<llvm::FunctionType>(OutlinedFn->getType()->getPointerElementType()); + for (unsigned I = 0, E = Args.size(); I < E; ++I) { + llvm::Type *TargetType = FnType->getParamType(I); + llvm::Value *NativeArg = Args[I]; + if (!TargetType->isPointerTy()) { + TargetArgs.emplace_back(NativeArg); + continue; + } + llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + TargetArgs.emplace_back( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); + } + CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, OutlinedFn, TargetArgs); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index ae25e94759e..ac0e0373418 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -268,6 +268,31 @@ public: /// \return Specified function. llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + /// Translates the native parameter of outlined function if this is required + /// for target. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const override; + + /// Maps the native argument to the address of the corresponding + /// target-specific argument. + /// \param FD Field decl from captured record for the paramater. + /// \param NativeParam Parameter itself. + /// \param TargetParam Corresponding target-specific parameter. + /// \param MapFn Function that maps the native parameter to the address of the + /// target-specific. + void mapParameterAddress(CodeGenFunction &CGF, const FieldDecl *FD, + const VarDecl *NativeParam, + const VarDecl *TargetParam, + const MappingFnType MapFn) const override; + + /// Emits call of the outlined function with the provided arguments, + /// translating these arguments to correct target-specific arguments. + void emitOutlinedFunctionCall( + CodeGenFunction &CGF, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> Args = llvm::None) const override; + /// Target codegen is specialized based on two programming models: the /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' /// model for constructs like 'target parallel' that support it. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 436a04fbeb1..a62277e8fe3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -246,24 +246,27 @@ namespace { const CapturedStmt *S = nullptr; /// true if cast to/from UIntPtr is required for variables captured by /// value. - bool UIntPtrCastRequired = true; + const bool UIntPtrCastRequired = true; /// true if only casted argumefnts must be registered as local args or VLA /// sizes. - bool RegisterCastedArgsOnly = false; + const bool RegisterCastedArgsOnly = false; /// Name of the generated function. - StringRef FunctionName; + const StringRef FunctionName; + /// Function that maps given variable declaration to the specified address. + const CGOpenMPRuntime::MappingFnType MapFn; explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, bool RegisterCastedArgsOnly, - StringRef FunctionName) + StringRef FunctionName, + const CGOpenMPRuntime::MappingFnType MapFn) : S(S), UIntPtrCastRequired(UIntPtrCastRequired), RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), - FunctionName(FunctionName) {} + FunctionName(FunctionName), MapFn(MapFn) {} }; } static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, - llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> &LocalAddrs, llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> &VLASizes, @@ -276,9 +279,13 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( // Build the argument list. CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); + FunctionArgList TargetArgs; bool HasUIntPtrArgs = false; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); + TargetArgs.append( + CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); auto I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); @@ -308,19 +315,28 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( } if (ArgType->isVariablyModifiedType()) ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); - Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, - FD->getLocation(), II, ArgType, - ImplicitParamDecl::Other)); + auto *Arg = + ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, + ArgType, ImplicitParamDecl::Other); + Args.emplace_back(Arg); + // Do not cast arguments if we emit function with non-original types. + TargetArgs.emplace_back( + FO.UIntPtrCastRequired + ? Arg + : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } Args.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); + TargetArgs.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = @@ -331,16 +347,21 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( F->setDoesNotThrow(); // Generate the function. - CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, CD->getLocation(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { + // Do not map arguments if we emit function with non-original types. + if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { + CGM.getOpenMPRuntime().mapParameterAddress(CGF, FD, Args[Cnt], + TargetArgs[Cnt], FO.MapFn); + } + Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { Address RefAddr = CGF.CreateMemTemp( @@ -357,8 +378,8 @@ static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( } LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), - Args[Cnt]->getType(), BaseInfo); + LValue ArgLVal = + CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo); if (FD->hasCapturedVLAType()) { if (FO.UIntPtrCastRequired) { ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), @@ -426,10 +447,19 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { getDebugInfo() && CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; FunctionArgList Args; - llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; - FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - CapturedStmtInfo->getHelperName()); + FunctionOptions FO( + &S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, + CapturedStmtInfo->getHelperName(), + [NeedWrapperFunction](CodeGenFunction &CGF, const VarDecl *VD, + Address Addr) { + if (!NeedWrapperFunction) { + llvm_unreachable("Function should not be called if wrapper function " + "is not required."); + } + CGF.setAddrOfLocalVar(VD, Addr); + }); llvm::Function *F; bool HasUIntPtrArgs; std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( @@ -452,7 +482,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { llvm::raw_svector_ostream Out(Buffer); Out << "__nondebug_wrapper_" << CapturedStmtInfo->getHelperName(); FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, - /*RegisterCastedArgsOnly=*/true, Out.str()); + /*RegisterCastedArgsOnly=*/true, Out.str(), + [](CodeGenFunction &, const VarDecl *, Address) { + llvm_unreachable("Function should not be called"); + }); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); WrapperCGF.disableDebugInfo(); Args.clear(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 28b94191115..e482337df23 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -14013,6 +14013,8 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI, Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); + if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) + S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel); CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 72a7daa919c..8394a515319 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1327,6 +1327,39 @@ bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) { DSAStack->isTaskgroupReductionRef(D, Level)); } +void Sema::setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level) { + assert(LangOpts.OpenMP && "OpenMP is not allowed"); + D = getCanonicalDecl(D); + OpenMPClauseKind OMPC = OMPC_unknown; + for (unsigned I = DSAStack->getNestingLevel() + 1; I > Level; --I) { + const unsigned NewLevel = I - 1; + if (DSAStack->hasExplicitDSA(D, + [&OMPC](const OpenMPClauseKind K) { + if (isOpenMPPrivate(K)) { + OMPC = K; + return true; + } + return false; + }, + NewLevel)) + break; + if (DSAStack->checkMappableExprComponentListsForDeclAtLevel( + D, NewLevel, + [](OMPClauseMappableExprCommon::MappableExprComponentListRef, + OpenMPClauseKind) { return true; })) { + OMPC = OMPC_map; + break; + } + if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective, + NewLevel)) { + OMPC = OMPC_firstprivate; + break; + } + } + if (OMPC != OMPC_unknown) + FD->addAttr(OMPCaptureKindAttr::CreateImplicit(Context, OMPC)); +} + bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) { assert(LangOpts.OpenMP && "OpenMP is not allowed"); // Return true if the current level is no longer enclosed in a target region. diff --git a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp index 5dcff8e5484..c4e242ee88d 100644 --- a/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -1,15 +1,14 @@ - // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER -template<typename tx, typename ty> -struct TT{ +template <typename tx, typename ty> +struct TT { tx X; ty Y; }; @@ -23,29 +22,32 @@ int foo(int n, double *ptr) { float b[10]; double c[5][10]; TT<long long, char> d; - - #pragma omp target firstprivate(a) + +#pragma omp target firstprivate(a) map(tofrom \ + : b) { + b[a] = a; } - - // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]]) + + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}([10 x float] addrspace(1)* noalias [[B_IN:%.+]], i{{[0-9]+}} [[A_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, + // TCHECK-64: call void @llvm.dbg.declare(metadata [10 x float] addrspace(1)** %{{.+}}, metadata !{{[0-9]+}}, metadata ![[LOCAL:[0-9]+]]) // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], - // TCHECK: ret void + // TCHECK: ret void -#pragma omp target firstprivate(aa,b,c,d) +#pragma omp target firstprivate(aa, b, c, d) { aa += 1; b[2] = 1.0; c[1][2] = 1.0; d.X = 1; - d.Y = 1; + d.Y = 1; } - + // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], [10 x float]*{{.*}} [[B_IN:%.+]], [5 x [10 x double]]*{{.*}} [[C_IN:%.+]], [[TT]]*{{.*}} [[D_IN:%.+]]) // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, @@ -58,10 +60,12 @@ int foo(int n, double *ptr) { // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], // TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]], - // TCHECK: [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]], + // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** % // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], + // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** % // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], + // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** % // firstprivate(aa): a_priv = a_in @@ -74,16 +78,15 @@ int foo(int n, double *ptr) { // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[C_PRIV_BCAST]], i8* [[C_IN_BCAST]],{{.+}}) - + // firstprivate(d) // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}}) - // TCHECK: load i16, i16* [[CONV_A2ADDR]], + // TCHECK: load i16, i16* [[A2_ADDR]], - - #pragma omp target firstprivate(ptr) +#pragma omp target firstprivate(ptr) { ptr[0]++; } @@ -98,13 +101,12 @@ int foo(int n, double *ptr) { return a; } - -template<typename tx> +template <typename tx> tx ftemplate(int n) { tx a = 0; tx b[10]; -#pragma omp target firstprivate(a,b) +#pragma omp target firstprivate(a, b) { a += 1; b[2] += 1; @@ -113,13 +115,12 @@ tx ftemplate(int n) { return a; } -static -int fstatic(int n) { +static int fstatic(int n) { int a = 0; char aaa = 0; int b[10]; -#pragma omp target firstprivate(a,aaa,b) +#pragma omp target firstprivate(a, aaa, b) { a += 1; aaa += 1; @@ -129,7 +130,7 @@ int fstatic(int n) { return a; } -// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, @@ -138,9 +139,8 @@ int fstatic(int n) { // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % // firstprivate(a): a_priv = a_in @@ -158,8 +158,8 @@ int fstatic(int n) { struct S1 { double a; - int r1(int n){ - int b = n+1; + int r1(int n) { + int b = n + 1; #pragma omp target firstprivate(b) { @@ -169,7 +169,7 @@ struct S1 { return (int)b; } - // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) + // TCHECK: define internal void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, @@ -185,9 +185,7 @@ struct S1 { // TCHECK: ret void }; - - -int bar(int n, double *ptr){ +int bar(int n, double *ptr) { int a = 0; a += foo(n, ptr); S1 S; @@ -200,15 +198,15 @@ int bar(int n, double *ptr){ // template -// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define internal void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, // TCHECK-NOT: alloca i{{[0-9]+}}, // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % // firstprivate(a) // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp new file mode 100644 index 00000000000..41ef09cfdeb --- /dev/null +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// expected-no-diagnostics + +int main() { + /* int(*b)[a]; */ + /* int *(**c)[a]; */ + int a; + int b[10][10]; + int c[10][10][10]; +#pragma omp target parallel firstprivate(a, b) map(tofrom \ + : c) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + } +#pragma omp target parallel firstprivate(a) map(tofrom \ + : c, b) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + } +#pragma omp target parallel map(tofrom \ + : a, c, b) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + } + return 0; +} + +// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 addrspace(1)* noalias {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + |

