diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2019-04-08 16:53:57 +0000 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-04-08 16:53:57 +0000 |
| commit | 1db9bfeba59b15bfb3f8592588e9aa9cd13ccd6b (patch) | |
| tree | f3ee8c20617cb457afafe3bf602e8d1a77b8867e | |
| parent | 10d6008f85237921bdd7956287d81acbe51b7dde (diff) | |
| download | bcm5719-llvm-1db9bfeba59b15bfb3f8592588e9aa9cd13ccd6b.tar.gz bcm5719-llvm-1db9bfeba59b15bfb3f8592588e9aa9cd13ccd6b.zip | |
[OPENMP][NVPTX]Fixed processing of memory management directives.
Added special processing of the memory management directives/clauses for
NVPTX target. For private locals, omp_default_mem_alloc and
omp_thread_mem_alloc result in allocation in local memory.
omp_const_mem_alloc allocates const memory, omp_teams_mem_alloc
allocates shared memory, and omp_cgroup_mem_alloc and
omp_large_cap_mem_alloc allocate global memory.
llvm-svn: 357923
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 56 | ||||
| -rw-r--r-- | clang/test/OpenMP/nvptx_allocate_codegen.cpp | 33 |
2 files changed, 74 insertions, 15 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 46b1b0faaee..632bca6ff9d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -318,6 +318,9 @@ class CheckVarsEscapingDeclContext final OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) return; VD = cast<ValueDecl>(VD->getCanonicalDecl()); + // Use user-specified allocation. + if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>()) + return; // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { @@ -4725,7 +4728,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF, Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { - bool UseDefaultAllocator = true; if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) { const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); switch (A->getAllocatorType()) { @@ -4733,17 +4735,48 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, // threadlocal. case OMPAllocateDeclAttr::OMPDefaultMemAlloc: case OMPAllocateDeclAttr::OMPThreadMemAlloc: - // Just pass-through to check if the globalization is required. - break; - case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: - case OMPAllocateDeclAttr::OMPCGroupMemAlloc: case OMPAllocateDeclAttr::OMPHighBWMemAlloc: case OMPAllocateDeclAttr::OMPLowLatMemAlloc: - case OMPAllocateDeclAttr::OMPConstMemAlloc: - case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + // Follow the user decision - use default allocation. + return Address::invalid(); case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: - UseDefaultAllocator = false; - break; + // TODO: implement aupport for user-defined allocators. + return Address::invalid(); + case OMPAllocateDeclAttr::OMPConstMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName(), + /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: { + llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); + auto *GV = new llvm::GlobalVariable( + CGM.getModule(), VarTy, /*isConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(VarTy), VD->getName()); + CharUnits Align = CGM.getContext().getDeclAlign(VD); + GV->setAlignment(Align.getQuantity()); + return Address(GV, Align); + } } } @@ -4769,11 +4802,6 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF, } } - // TODO: replace it with return - // UseDefaultAllocator ? Address::invalid : - // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp - // supports __kmpc_alloc|__kmpc_free. - (void)UseDefaultAllocator; // Prevent a warning. return Address::invalid(); } diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp index ec1faff4265..9a285d0d093 100644 --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -24,6 +24,8 @@ extern const omp_allocator_handle_t omp_thread_mem_alloc; // CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0, // CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0, // CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32, +// CHECK-DAG: @bar_c = internal global i32 0, +// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00, struct St{ int a; }; @@ -64,13 +66,42 @@ int main () { #pragma omp allocate(a) allocator(omp_thread_mem_alloc) a=2; double b = 3; + float c; #pragma omp allocate(b) allocator(omp_default_mem_alloc) +#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc) return (foo<int>()); } // CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() -// CHECK: alloca i32, +// CHECK-NOT: alloca i32, extern template int ST<int>::m; + +void baz(float &); + +// CHECK: define void @{{.+}}bar{{.+}}() +void bar() { + // CHECK: alloca float, + float bar_a; + // CHECK: alloca double, + double bar_b; + int bar_c; +#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc) + // CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}) +#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc \ + : bar_a) allocate(omp_pteam_mem_alloc \ + : bar_b) + { + bar_b = bar_a; + baz(bar_a); + } +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) +// CHECK-NOT: alloca double, +// CHECK: alloca float, +// CHECK-NOT: alloca double, +// CHECK: load float, float* % +// CHECK: store double {{.+}}, double addrspace(3)* @bar_b, +} + #pragma omp end declare target #endif |

