diff options
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 56 | ||||
| -rw-r--r-- | clang/test/OpenMP/nvptx_allocate_codegen.cpp | 33 | 
2 files changed, 74 insertions, 15 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 46b1b0faaee..632bca6ff9d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -318,6 +318,9 @@ class CheckVarsEscapingDeclContext final          OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))        return;      VD = cast<ValueDecl>(VD->getCanonicalDecl()); +    // Use user-specified allocation. +    if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>()) +      return;      // Variables captured by value must be globalized.      if (auto *CSI = CGF.CapturedStmtInfo) {        if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { @@ -4725,7 +4728,6 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,  Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,                                                          const VarDecl *VD) { -  bool UseDefaultAllocator = true;    if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {      const auto *A = VD->getAttr<OMPAllocateDeclAttr>();      switch (A->getAllocatorType()) { @@ -4733,17 +4735,48 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,        // threadlocal.      case OMPAllocateDeclAttr::OMPDefaultMemAlloc:      case OMPAllocateDeclAttr::OMPThreadMemAlloc: -      // Just pass-through to check if the globalization is required. -      break; -    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: -    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:      case OMPAllocateDeclAttr::OMPHighBWMemAlloc:      case OMPAllocateDeclAttr::OMPLowLatMemAlloc: -    case OMPAllocateDeclAttr::OMPConstMemAlloc: -    case OMPAllocateDeclAttr::OMPPTeamMemAlloc: +      // Follow the user decision - use default allocation. +      return Address::invalid();      case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: -      UseDefaultAllocator = false; -      break; +      // TODO: implement aupport for user-defined allocators. +      return Address::invalid(); +    case OMPAllocateDeclAttr::OMPConstMemAlloc: { +      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); +      auto *GV = new llvm::GlobalVariable( +          CGM.getModule(), VarTy, /*isConstant=*/false, +          llvm::GlobalValue::InternalLinkage, +          llvm::Constant::getNullValue(VarTy), VD->getName(), +          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, +          CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant)); +      CharUnits Align = CGM.getContext().getDeclAlign(VD); +      GV->setAlignment(Align.getQuantity()); +      return Address(GV, Align); +    } +    case OMPAllocateDeclAttr::OMPPTeamMemAlloc: { +      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); +      auto *GV = new llvm::GlobalVariable( +          CGM.getModule(), VarTy, /*isConstant=*/false, +          llvm::GlobalValue::InternalLinkage, +          llvm::Constant::getNullValue(VarTy), VD->getName(), +          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal, +          CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); +      CharUnits Align = CGM.getContext().getDeclAlign(VD); +      GV->setAlignment(Align.getQuantity()); +      return Address(GV, Align); +    } +    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: +    case OMPAllocateDeclAttr::OMPCGroupMemAlloc: { +      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType()); +      auto *GV = new llvm::GlobalVariable( +          CGM.getModule(), VarTy, /*isConstant=*/false, +          llvm::GlobalValue::InternalLinkage, +          llvm::Constant::getNullValue(VarTy), VD->getName()); +      CharUnits Align = CGM.getContext().getDeclAlign(VD); +      GV->setAlignment(Align.getQuantity()); +      return Address(GV, Align); +    }      }    } @@ -4769,11 +4802,6 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,      }    } -  // TODO: replace it with return -  // UseDefaultAllocator ? Address::invalid : -  // CGOpenMPRuntime::getAddressOfLocalVariable(CGF, VD); when NVPTX libomp -  // supports __kmpc_alloc|__kmpc_free. -  (void)UseDefaultAllocator; // Prevent a warning.    return Address::invalid();  } diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp index ec1faff4265..9a285d0d093 100644 --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -24,6 +24,8 @@ extern const omp_allocator_handle_t omp_thread_mem_alloc;  // CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0,  // CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0,  // CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32, +// CHECK-DAG: @bar_c = internal global i32 0, +// CHECK-DAG: @bar_b = internal addrspace(3) global double 0.000000e+00,  struct St{   int a;  }; @@ -64,13 +66,42 @@ int main () {  #pragma omp allocate(a) allocator(omp_thread_mem_alloc)    a=2;    double b = 3; +  float c;  #pragma omp allocate(b) allocator(omp_default_mem_alloc) +#pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)    return (foo<int>());  }  // CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() -// CHECK: alloca i32, +// CHECK-NOT: alloca i32,  extern template int ST<int>::m; + +void baz(float &); + +// CHECK: define void @{{.+}}bar{{.+}}() +void bar() { +  // CHECK: alloca float, +  float bar_a; +  // CHECK: alloca double, +  double bar_b; +  int bar_c; +#pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc) +  // CHECK: call void [[OUTLINED:@.+]](i32* %{{.+}}, i32* %{{.+}}) +#pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc                  \ +                                                    : bar_a) allocate(omp_pteam_mem_alloc \ +                                                                      : bar_b) +  { +    bar_b = bar_a; +    baz(bar_a); +  } +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}) +// CHECK-NOT: alloca double, +// CHECK: alloca float, +// CHECK-NOT: alloca double, +// CHECK: load float, float* % +// CHECK: store double {{.+}}, double addrspace(3)* @bar_b, +} +  #pragma omp end declare target  #endif  | 

