diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 757 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 219 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 39 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 12 | ||||
-rw-r--r-- | clang/lib/Frontend/CompilerInvocation.cpp | 24 | ||||
-rw-r--r-- | clang/lib/Serialization/ASTReader.cpp | 7 | ||||
-rw-r--r-- | clang/lib/Serialization/ASTWriter.cpp | 7 |
7 files changed, 1024 insertions, 41 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0ba7e0639ac..6d4fc9f64b4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -11,16 +11,19 @@ // //===----------------------------------------------------------------------===// +#include "CGCXXABI.h" +#include "CGCleanup.h" #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" -#include "CGCleanup.h" #include "clang/AST/Decl.h" #include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -215,25 +218,31 @@ private: /// \brief API for captured statement code generation in OpenMP target /// constructs. For this captures, implicit parameters are used instead of the -/// captured fields. +/// captured fields. The name of the target region has to be unique in a given +/// application so it is provided by the client, because only the client has +/// the information to generate that. class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, - const RegionCodeGenTy &CodeGen) + const RegionCodeGenTy &CodeGen, StringRef HelperName) : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, - /*HasCancel = */ false) {} + /*HasCancel=*/false), + HelperName(HelperName) {} /// \brief This is unused for target regions because each starts executing /// with a single thread. const VarDecl *getThreadIDVariable() const override { return nullptr; } /// \brief Get the name of the capture helper. - StringRef getHelperName() const override { return ".omp_offloading."; } + StringRef getHelperName() const override { return HelperName; } static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; } + +private: + StringRef HelperName; }; /// \brief RAII for emitting code of OpenMP constructs. @@ -301,7 +310,8 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) { + : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr), + OffloadEntriesInfoManager(CGM) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, @@ -311,6 +321,8 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) llvm::PointerType::getUnqual(CGM.Int32Ty)}; Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); + + loadOffloadInfoMetadata(); } void CGOpenMPRuntime::clear() { @@ -931,6 +943,26 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_register_lib: { + // Build void __tgt_register_lib(__tgt_bin_desc *desc); + QualType ParamTy = + CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); + llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); + break; + } + case OMPRTL__tgt_unregister_lib: { + // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); + QualType ParamTy = + CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); + llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); + break; + } } return RTLFn; } @@ -1969,6 +2001,381 @@ enum KmpTaskTFields { }; } // anonymous namespace +bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { + // FIXME: Add other entries type when they become supported. + return OffloadEntriesTargetRegion.empty(); +} + +/// \brief Initialize target region entry. +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, + StringRef ParentName, unsigned LineNum, + unsigned ColNum, unsigned Order) { + assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " + "only required for the device " + "code generation."); + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = + OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr); + ++OffloadingEntriesNum; +} + +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: + registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, + StringRef ParentName, unsigned LineNum, + unsigned ColNum, llvm::Constant *Addr, + llvm::Constant *ID) { + // If we are emitting code for a target, the entry is already initialized, + // only has to be registered. + if (CGM.getLangOpts().OpenMPIsDevice) { + assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, + ColNum) && + "Entry must exist."); + auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName] + [LineNum][ColNum]; + assert(Entry.isValid() && "Entry not initialized!"); + Entry.setAddress(Addr); + Entry.setID(ID); + return; + } else { + OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID); + OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] = + Entry; + } +} + +bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( + unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, + unsigned ColNum) const { + auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); + if (PerDevice == OffloadEntriesTargetRegion.end()) + return false; + auto PerFile = PerDevice->second.find(FileID); + if (PerFile == PerDevice->second.end()) + return false; + auto PerParentName = PerFile->second.find(ParentName); + if (PerParentName == PerFile->second.end()) + return false; + auto PerLine = PerParentName->second.find(LineNum); + if (PerLine == PerParentName->second.end()) + return false; + auto PerColumn = PerLine->second.find(ColNum); + if (PerColumn == PerLine->second.end()) + return false; + // Fail if this entry is already registered. + if (PerColumn->second.getAddress() || PerColumn->second.getID()) + return false; + return true; +} + +void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( + const OffloadTargetRegionEntryInfoActTy &Action) { + // Scan all target region entries and perform the provided action. + for (auto &D : OffloadEntriesTargetRegion) + for (auto &F : D.second) + for (auto &P : F.second) + for (auto &L : P.second) + for (auto &C : L.second) + Action(D.first, F.first, P.first(), L.first, C.first, C.second); +} + +/// \brief Create a Ctor/Dtor-like function whose body is emitted through +/// \a Codegen. This is used to emit the two functions that register and +/// unregister the descriptor of the current compilation unit. +static llvm::Function * +createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, + const RegionCodeGenTy &Codegen) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, C.VoidPtrTy); + Args.push_back(&DummyPtr); + + CodeGenFunction CGF(CGM); + GlobalDecl(); + auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + C.VoidTy, Args, FunctionType::ExtInfo(), + /*isVariadic=*/false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto *Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation()); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation()); + Codegen(CGF); + CGF.FinishFunction(); + return Fn; +} + +llvm::Function * +CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { + + // If we don't have entries or if we are emitting code for the device, we + // don't need to do anything. + if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) + return nullptr; + + auto &M = CGM.getModule(); + auto &C = CGM.getContext(); + + // Get list of devices we care about + auto &Devices = CGM.getLangOpts().OMPTargetTriples; + + // We should be creating an offloading descriptor only if there are devices + // specified. + assert(!Devices.empty() && "No OpenMP offloading devices??"); + + // Create the external variables that will point to the begin and end of the + // host entries section. These will be defined by the linker. + auto *OffloadEntryTy = + CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); + llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable( + M, OffloadEntryTy, /*isConstant=*/true, + llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, + ".omp_offloading.entries_begin"); + llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable( + M, OffloadEntryTy, /*isConstant=*/true, + llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0, + ".omp_offloading.entries_end"); + + // Create all device images + llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires; + auto *DeviceImageTy = cast<llvm::StructType>( + CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); + + for (unsigned i = 0; i < Devices.size(); ++i) { + StringRef T = Devices[i].getTriple(); + auto *ImgBegin = new llvm::GlobalVariable( + M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, + /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T)); + auto *ImgEnd = new llvm::GlobalVariable( + M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, + /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T)); + + llvm::Constant *Dev = + llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd, + HostEntriesBegin, HostEntriesEnd, nullptr); + DeviceImagesEntires.push_back(Dev); + } + + // Create device images global array. + llvm::ArrayType *DeviceImagesInitTy = + llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size()); + llvm::Constant *DeviceImagesInit = + llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires); + + llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable( + M, DeviceImagesInitTy, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, DeviceImagesInit, + ".omp_offloading.device_images"); + DeviceImages->setUnnamedAddr(true); + + // This is a Zero array to be used in the creation of the constant expressions + llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), + llvm::Constant::getNullValue(CGM.Int32Ty)}; + + // Create the target region descriptor. + auto *BinaryDescriptorTy = cast<llvm::StructType>( + CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy())); + llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get( + BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), + llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages, + Index), + HostEntriesBegin, HostEntriesEnd, nullptr); + + auto *Desc = new llvm::GlobalVariable( + M, BinaryDescriptorTy, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit, + ".omp_offloading.descriptor"); + + // Emit code to register or unregister the descriptor at execution + // startup or closing, respectively. + + // Create a variable to drive the registration and unregistration of the + // descriptor, so we can reuse the logic that emits Ctors and Dtors. + auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var"); + ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(), + IdentInfo, C.CharTy); + + auto *UnRegFn = createOffloadingBinaryDescriptorFunction( + CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { + CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), + Desc); + }); + auto *RegFn = createOffloadingBinaryDescriptorFunction( + CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { + CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), + Desc); + CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); + }); + return RegFn; +} + +void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name, + uint64_t Size) { + auto *TgtOffloadEntryType = cast<llvm::StructType>( + CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy())); + llvm::LLVMContext &C = CGM.getModule().getContext(); + llvm::Module &M = CGM.getModule(); + + // Make sure the address has the right type. + llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy); + + // Create constant string with the name. + llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); + + llvm::GlobalVariable *Str = + new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, StrPtrInit, + ".omp_offloading.entry_name"); + Str->setUnnamedAddr(true); + llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy); + + // Create the entry struct. + llvm::Constant *EntryInit = llvm::ConstantStruct::get( + TgtOffloadEntryType, AddrPtr, StrPtr, + llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr); + llvm::GlobalVariable *Entry = new llvm::GlobalVariable( + M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage, + EntryInit, ".omp_offloading.entry"); + + // The entry has to be created in the section the linker expects it to be. + Entry->setSection(".omp_offloading.entries"); + // We can't have any padding between symbols, so we need to have 1-byte + // alignment. + Entry->setAlignment(1); + return; +} + +void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { + // Emit the offloading entries and metadata so that the device codegen side + // can + // easily figure out what to emit. The produced metadata looks like this: + // + // !omp_offload.info = !{!1, ...} + // + // Right now we only generate metadata for function that contain target + // regions. + + // If we do not have entries, we dont need to do anything. + if (OffloadEntriesInfoManager.empty()) + return; + + llvm::Module &M = CGM.getModule(); + llvm::LLVMContext &C = M.getContext(); + SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> + OrderedEntries(OffloadEntriesInfoManager.size()); + + // Create the offloading info metadata node. + llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); + + // Auxiliar methods to create metadata values and strings. + auto getMDInt = [&](unsigned v) { + return llvm::ConstantAsMetadata::get( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v)); + }; + + auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); }; + + // Create function that emits metadata for each target region entry; + auto &&TargetRegionMetadataEmitter = [&]( + unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line, + unsigned Column, + OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { + llvm::SmallVector<llvm::Metadata *, 32> Ops; + // Generate metadata for target regions. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (0). + // - Entry 1 -> Device ID of the file where the entry was identified. + // - Entry 2 -> File ID of the file where the entry was identified. + // - Entry 3 -> Mangled name of the function where the entry was identified. + // - Entry 4 -> Line in the file where the entry was identified. + // - Entry 5 -> Column in the file where the entry was identified. + // - Entry 6 -> Order the entry was created. + // The first element of the metadata node is the kind. + Ops.push_back(getMDInt(E.getKind())); + Ops.push_back(getMDInt(DeviceID)); + Ops.push_back(getMDInt(FileID)); + Ops.push_back(getMDString(ParentName)); + Ops.push_back(getMDInt(Line)); + Ops.push_back(getMDInt(Column)); + Ops.push_back(getMDInt(E.getOrder())); + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = &E; + + // Add metadata to the named metadata node. + MD->addOperand(llvm::MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( + TargetRegionMetadataEmitter); + + for (auto *E : OrderedEntries) { + assert(E && "All ordered entries must exist!"); + if (auto *CE = + dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( + E)) { + assert(CE->getID() && CE->getAddress() && + "Entry ID and Addr are invalid!"); + createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0); + } else + llvm_unreachable("Unsupported entry kind."); + } +} + +/// \brief Loads all the offload entries information from the host IR +/// metadata. +void CGOpenMPRuntime::loadOffloadInfoMetadata() { + // If we are in target mode, load the metadata from the host IR. This code has + // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). + + if (!CGM.getLangOpts().OpenMPIsDevice) + return; + + if (CGM.getLangOpts().OMPHostIRFile.empty()) + return; + + auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); + if (Buf.getError()) + return; + + llvm::LLVMContext C; + auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C); + + if (ME.getError()) + return; + + llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); + if (!MD) + return; + + for (auto I : MD->operands()) { + llvm::MDNode *MN = cast<llvm::MDNode>(I); + + auto getMDInt = [&](unsigned Idx) { + llvm::ConstantAsMetadata *V = + cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); + return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); + }; + + auto getMDString = [&](unsigned Idx) { + llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx)); + return V->getString(); + }; + + switch (getMDInt(0)) { + default: + llvm_unreachable("Unexpected metadata!"); + break; + case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: + OFFLOAD_ENTRY_INFO_TARGET_REGION: + OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( + /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2), + /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4), + /*Column=*/getMDInt(5), /*Order=*/getMDInt(6)); + break; + } + } +} + void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { if (!KmpRoutineEntryPtrTy) { // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. @@ -1992,6 +2399,80 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, return Field; } +QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { + + // Make sure the type of the entry is already created. This is the type we + // have to create: + // struct __tgt_offload_entry{ + // void *addr; // Pointer to the offload entry info. + // // (function or global) + // char *name; // Name of the function or global. + // size_t size; // Size of the entry info (0 if it a function). + // }; + if (TgtOffloadEntryQTy.isNull()) { + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("__tgt_offload_entry"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); + addFieldToRecordDecl(C, RD, C.getSizeType()); + RD->completeDefinition(); + TgtOffloadEntryQTy = C.getRecordType(RD); + } + return TgtOffloadEntryQTy; +} + +QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { + // These are the types we need to build: + // struct __tgt_device_image{ + // void *ImageStart; // Pointer to the target code start. + // void *ImageEnd; // Pointer to the target code end. + // // We also add the host entries to the device image, as it may be useful + // // for the target runtime to have access to that information. + // __tgt_offload_entry *EntriesBegin; // Begin of the table with all + // // the entries. + // __tgt_offload_entry *EntriesEnd; // End of the table with all the + // // entries (non inclusive). + // }; + if (TgtDeviceImageQTy.isNull()) { + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("__tgt_device_image"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + addFieldToRecordDecl(C, RD, C.VoidPtrTy); + addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); + addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); + RD->completeDefinition(); + TgtDeviceImageQTy = C.getRecordType(RD); + } + return TgtDeviceImageQTy; +} + +QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { + // struct __tgt_bin_desc{ + // int32_t NumDevices; // Number of devices supported. + // __tgt_device_image *DeviceImages; // Arrays of device images + // // (one per device). + // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the + // // entries. + // __tgt_offload_entry *EntriesEnd; // End of the table with all the + // // entries (non inclusive). + // }; + if (TgtBinaryDescriptorQTy.isNull()) { + ASTContext &C = CGM.getContext(); + auto *RD = C.buildImplicitRecord("__tgt_bin_desc"); + RD->startDefinition(); + addFieldToRecordDecl( + C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); + addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); + addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); + addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); + RD->completeDefinition(); + TgtBinaryDescriptorQTy = C.getRecordType(RD); + } + return TgtBinaryDescriptorQTy; +} + namespace { struct PrivateHelpersTy { PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, @@ -3238,20 +3719,115 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, } } -llvm::Value * -CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D, - const RegionCodeGenTy &CodeGen) { +/// \brief Obtain information that uniquely identifies a target entry. This +/// consists of the file and device IDs as well as line and column numbers +/// associated with the relevant entry source location. +static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, + unsigned &DeviceID, unsigned &FileID, + unsigned &LineNum, unsigned &ColumnNum) { + + auto &SM = C.getSourceManager(); + + // The loc should be always valid and have a file ID (the user cannot use + // #pragma directives in macros) + + assert(Loc.isValid() && "Source location is expected to be always valid."); + assert(Loc.isFileID() && "Source location is expected to refer to a file."); + + PresumedLoc PLoc = SM.getPresumedLoc(Loc); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + llvm_unreachable("Source file with target region no longer exists!"); + + DeviceID = ID.getDevice(); + FileID = ID.getFile(); + LineNum = PLoc.getLine(); + ColumnNum = PLoc.getColumn(); + return; +} + +void CGOpenMPRuntime::emitTargetOutlinedFunction( + const OMPExecutableDirective &D, StringRef ParentName, + llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry) { + + assert(!ParentName.empty() && "Invalid target region parent name!"); + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + // Emit target region as a standalone region. + auto &&CodeGen = [&CS](CodeGenFunction &CGF) { + CGF.EmitStmt(CS.getCapturedStmt()); + }; + + // Create a unique name for the proxy/entry function that using the source + // location information of the current target region. The name will be + // something like: + // + // .omp_offloading.DD_FFFF.PP.lBB.cCC + // + // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the + // mangled name of the function that encloses the target region, BB is the + // line number of the target region, and CC is the column number of the target + // region. + + unsigned DeviceID; + unsigned FileID; + unsigned Line; + unsigned Column; + getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID, + Line, Column); + SmallString<64> EntryFnName; + { + llvm::raw_svector_ostream OS(EntryFnName); + OS << ".omp_offloading" << llvm::format(".%x", DeviceID) + << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c" + << Column; + } + CodeGenFunction CGF(CGM, true); - CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen); + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(CS); + + OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); + + // If this target outline function is not an offload entry, we don't need to + // register it. + if (!IsOffloadEntry) + return; + + // The target region ID is used by the runtime library to identify the current + // target region, so it only has to be unique and not necessarily point to + // anything. It could be the pointer to the outlined function that implements + // the target region, but we aren't using that so that the compiler doesn't + // need to keep that, and could therefore inline the host function if proven + // worthwhile during optimization. In the other hand, if emitting code for the + // device, the ID has to be the function address so that it can retrieved from + // the offloading entry and launched by the runtime library. We also mark the + // outlined function to have external linkage in case we are emitting code for + // the device, because these functions will be entry points to the device. + + if (CGM.getLangOpts().OpenMPIsDevice) { + OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); + OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage); + } else + OutlinedFnID = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id"); + + // Register the information for the entry associated with this target region. + OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID); + return; } void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, + llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef<llvm::Value *> CapturedVars) { if (!CGF.HaveInsertPoint()) @@ -3275,6 +3851,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OMP_DEVICEID_UNDEF = -1, }; + assert(OutlinedFn && "Invalid outlined function!"); + auto &Ctx = CGF.getContext(); // Fill up the arrays with the all the captured variables. @@ -3373,7 +3951,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, - hasVLACaptures, Device, OffloadError, + hasVLACaptures, Device, OutlinedFnID, OffloadError, OffloadErrorQType](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); @@ -3504,10 +4082,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // compiler doesn't need to keep that, and could therefore inline the host // function if proven worthwhile during optimization. - llvm::Value *HostPtr = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr"); + // From this point on, we need to have an ID of the target region defined. + assert(OutlinedFnID && "Invalid outlined function ID!"); // Emit device ID if any. llvm::Value *DeviceID; @@ -3518,25 +4094,35 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); llvm::Value *OffloadingArgs[] = { - DeviceID, HostPtr, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), OffloadingArgs); CGF.EmitStoreOfScalar(Return, OffloadError); }; - if (IfCond) { - // Notify that the host version must be executed. - auto &&ElseGen = [this, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), - OffloadError); - }; - emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); + // Notify that the host version must be executed. + auto &&ElseGen = [this, OffloadError, + OffloadErrorQType](CodeGenFunction &CGF) { + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), + OffloadError); + }; + + // If we have a target function ID it means that we need to support + // offloading, otherwise, just execute on the host. We need to execute on host + // regardless of the conditional in the if clause if, e.g., the user do not + // specify target triples. + if (OutlinedFnID) { + if (IfCond) { + emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); + } else { + CodeGenFunction::RunCleanupsScope Scope(CGF); + ThenGen(CGF); + } } else { CodeGenFunction::RunCleanupsScope Scope(CGF); - ThenGen(CGF); + ElseGen(CGF); } // Check the error code and execute the host version if required. @@ -3553,3 +4139,120 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); return; } + +void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, + StringRef ParentName) { + if (!S) + return; + + // If we find a OMP target directive, codegen the outline function and + // register the result. + // FIXME: Add other directives with target when they become supported. + bool isTargetDirective = isa<OMPTargetDirective>(S); + + if (isTargetDirective) { + auto *E = cast<OMPExecutableDirective>(S); + unsigned DeviceID; + unsigned FileID; + unsigned Line; + unsigned Column; + getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID, + FileID, Line, Column); + + // Is this a target region that should not be emitted as an entry point? If + // so just signal we are done with this target region. + if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo( + DeviceID, FileID, ParentName, Line, Column)) + return; + + llvm::Function *Fn; + llvm::Constant *Addr; + emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, + /*isOffloadEntry=*/true); + assert(Fn && Addr && "Target region emission failed."); + return; + } + + if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) { + if (!E->getAssociatedStmt()) + return; + + scanForTargetRegionsFunctions( + cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(), + ParentName); + return; + } + + // If this is a lambda function, look into its body. + if (auto *L = dyn_cast<LambdaExpr>(S)) + S = L->getBody(); + + // Keep looking for target regions recursively. + for (auto *II : S->children()) + scanForTargetRegionsFunctions(II, ParentName); + + return; +} + +bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { + auto &FD = *cast<FunctionDecl>(GD.getDecl()); + + // If emitting code for the host, we do not process FD here. Instead we do + // the normal code generation. + if (!CGM.getLangOpts().OpenMPIsDevice) + return false; + + // Try to detect target regions in the function. + scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD)); + + // We should not emit any function othen that the ones created during the + // scanning. Therefore, we signal that this function is completely dealt + // with. + return true; +} + +bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { + if (!CGM.getLangOpts().OpenMPIsDevice) + return false; + + // Check if there are Ctors/Dtors in this declaration and look for target + // regions in it. We use the complete variant to produce the kernel name + // mangling. + QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); + if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { + for (auto *Ctor : RD->ctors()) { + StringRef ParentName = + CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); + scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); + } + auto *Dtor = RD->getDestructor(); + if (Dtor) { + StringRef ParentName = + CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); + scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); + } + } + + // If we are in target mode we do not emit any global (declare target is not + // implemented yet). Therefore we signal that GD was processed in this case. + return true; +} + +bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { + auto *VD = GD.getDecl(); + if (isa<FunctionDecl>(VD)) + return emitTargetFunctions(GD); + + return emitTargetGlobalVariable(GD); +} + +llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { + // If we have offloading in the current module, we need to emit the entries + // now and register the offloading descriptor. + createOffloadEntriesAndInfoMetadata(); + + // Create and register the offloading binary descriptors. This is the main + // entity that captures all the information about offloading in the current + // compilation unit. + return createOffloadingBinaryDescriptorRegistration(); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 992f9a8805e..6b04fbeb09b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -35,6 +35,7 @@ class Value; namespace clang { class Expr; +class GlobalDecl; class OMPExecutableDirective; class VarDecl; @@ -165,6 +166,10 @@ private: // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, + // Call to void __tgt_register_lib(__tgt_bin_desc *desc); + OMPRTL__tgt_register_lib, + // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); + OMPRTL__tgt_unregister_lib, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -288,7 +293,181 @@ private: /// } flags; /// } kmp_depend_info_t; QualType KmpDependInfoTy; + /// \brief Type struct __tgt_offload_entry{ + /// void *addr; // Pointer to the offload entry info. + /// // (function or global) + /// char *name; // Name of the function or global. + /// size_t size; // Size of the entry info (0 if it a function). + /// }; + QualType TgtOffloadEntryQTy; + /// struct __tgt_device_image{ + /// void *ImageStart; // Pointer to the target code start. + /// void *ImageEnd; // Pointer to the target code end. + /// // We also add the host entries to the device image, as it may be useful + /// // for the target runtime to have access to that information. + /// __tgt_offload_entry *EntriesBegin; // Begin of the table with all + /// // the entries. + /// __tgt_offload_entry *EntriesEnd; // End of the table with all the + /// // entries (non inclusive). + /// }; + QualType TgtDeviceImageQTy; + /// struct __tgt_bin_desc{ + /// int32_t NumDevices; // Number of devices supported. + /// __tgt_device_image *DeviceImages; // Arrays of device images + /// // (one per device). + /// __tgt_offload_entry *EntriesBegin; // Begin of the table with all the + /// // entries. + /// __tgt_offload_entry *EntriesEnd; // End of the table with all the + /// // entries (non inclusive). + /// }; + QualType TgtBinaryDescriptorQTy; + /// \brief Entity that registers the offloading constants that were emitted so + /// far. + class OffloadEntriesInfoManagerTy { + CodeGenModule &CGM; + + /// \brief Number of entries registered so far. + unsigned OffloadingEntriesNum; + + public: + /// \brief Base class of the entries info. + class OffloadEntryInfo { + public: + /// \brief Kind of a given entry. Currently, only target regions are + /// supported. + enum OffloadingEntryInfoKinds { + // Entry is a target region. + OFFLOAD_ENTRY_INFO_TARGET_REGION = 0, + // Invalid entry info. + OFFLOAD_ENTRY_INFO_INVALID = ~0u + }; + + OffloadEntryInfo() : Order(~0u), Kind(OFFLOAD_ENTRY_INFO_INVALID) {} + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order) + : Order(Order), Kind(Kind) {} + + bool isValid() const { return Order != ~0u; } + unsigned getOrder() const { return Order; } + OffloadingEntryInfoKinds getKind() const { return Kind; } + static bool classof(const OffloadEntryInfo *Info) { return true; } + + protected: + // \brief Order this entry was emitted. + unsigned Order; + + OffloadingEntryInfoKinds Kind; + }; + + /// \brief Return true if a there are no entries defined. + bool empty() const; + /// \brief Return number of entries defined so far. + unsigned size() const { return OffloadingEntriesNum; } + OffloadEntriesInfoManagerTy(CodeGenModule &CGM) + : CGM(CGM), OffloadingEntriesNum(0) {} + + /// + /// Target region entries related. + /// + /// \brief Target region entries info. + class OffloadEntryInfoTargetRegion : public OffloadEntryInfo { + // \brief Address of the entity that has to be mapped for offloading. + llvm::Constant *Addr; + // \brief Address that can be used as the ID of the entry. + llvm::Constant *ID; + + public: + OffloadEntryInfoTargetRegion() + : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, ~0u), + Addr(nullptr), ID(nullptr) {} + explicit OffloadEntryInfoTargetRegion(unsigned Order, + llvm::Constant *Addr, + llvm::Constant *ID) + : OffloadEntryInfo(OFFLOAD_ENTRY_INFO_TARGET_REGION, Order), + Addr(Addr), ID(ID) {} + + llvm::Constant *getAddress() const { return Addr; } + llvm::Constant *getID() const { return ID; } + void setAddress(llvm::Constant *V) { + assert(!Addr && "Address as been set before!"); + Addr = V; + } + void setID(llvm::Constant *V) { + assert(!ID && "ID as been set before!"); + ID = V; + } + static bool classof(const OffloadEntryInfo *Info) { + return Info->getKind() == OFFLOAD_ENTRY_INFO_TARGET_REGION; + } + }; + /// \brief Initialize target region entry. + void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, + StringRef ParentName, unsigned LineNum, + unsigned ColNum, unsigned Order); + /// \brief Register target region entry. + void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, + StringRef ParentName, unsigned LineNum, + unsigned ColNum, llvm::Constant *Addr, + llvm::Constant *ID); + /// \brief Return true if a target region entry with the provided + /// information exists. + bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, + StringRef ParentName, unsigned LineNum, + unsigned ColNum) const; + /// brief Applies action \a Action on all registered entries. + typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned, + unsigned, OffloadEntryInfoTargetRegion &)> + OffloadTargetRegionEntryInfoActTy; + void actOnTargetRegionEntriesInfo( + const OffloadTargetRegionEntryInfoActTy &Action); + + private: + // Storage for target region entries kind. The storage is to be indexed by + // file ID, device ID, parent function name, lane number, and column number. + typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion> + OffloadEntriesTargetRegionPerColumn; + typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerColumn> + OffloadEntriesTargetRegionPerLine; + typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine> + OffloadEntriesTargetRegionPerParentName; + typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerParentName> + OffloadEntriesTargetRegionPerFile; + typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerFile> + OffloadEntriesTargetRegionPerDevice; + typedef OffloadEntriesTargetRegionPerDevice OffloadEntriesTargetRegionTy; + OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; + }; + OffloadEntriesInfoManagerTy OffloadEntriesInfoManager; + + /// \brief Creates and registers offloading binary descriptor for the current + /// compilation unit. The function that does the registration is returned. + llvm::Function *createOffloadingBinaryDescriptorRegistration(); + + /// \brief Creates offloading entry for the provided address \a Addr, + /// name \a Name and size \a Size. + void createOffloadEntry(llvm::Constant *Addr, StringRef Name, uint64_t Size); + + /// \brief Creates all the offload entries in the current compilation unit + /// along with the associated metadata. + void createOffloadEntriesAndInfoMetadata(); + + /// \brief Loads all the offload entries information from the host IR + /// metadata. + void loadOffloadInfoMetadata(); + /// \brief Returns __tgt_offload_entry type. + QualType getTgtOffloadEntryQTy(); + + /// \brief Returns __tgt_device_image type. + QualType getTgtDeviceImageQTy(); + + /// \brief Returns __tgt_bin_desc type. + QualType getTgtBinaryDescriptorQTy(); + + /// \brief Start scanning from statement \a S and and emit all target regions + /// found along the way. + /// \param S Starting statement. + /// \param ParentName Name of the function declaration that is being scanned. + void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName); /// \brief Build type kmp_routine_entry_t (if not built yet). void emitKmpRoutineEntryT(QualType KmpInt32Ty); @@ -743,16 +922,24 @@ public: /// \brief Emit outilined function for 'target' directive. /// \param D Directive to emit. - /// \param CodeGen Code generation sequence for the \a D directive. - virtual llvm::Value * - emitTargetOutlinedFunction(const OMPExecutableDirective &D, - const RegionCodeGenTy &CodeGen); + /// \param ParentName Name of the function that encloses the target region. + /// \param OutlinedFn Outlined function value to be defined by this call. + /// \param OutlinedFnID Outlined function ID value to be defined by this call. + /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// An oulined function may not be an entry if, e.g. the if clause always + /// evaluates to false. + virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, + StringRef ParentName, + llvm::Function *&OutlinedFn, + llvm::Constant *&OutlinedFnID, + bool IsOffloadEntry); /// \brief Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of /// a failure it executes the host version outlined in \a OutlinedFn. /// \param D Directive to emit. /// \param OutlinedFn Host version of the code to be offloaded. + /// \param OutlinedFnID ID of host version of the code to be offloaded. /// \param IfCond Expression evaluated in if clause associated with the target /// directive, or null if no if clause is used. /// \param Device Expression evaluated in device clause associated with the @@ -760,9 +947,31 @@ public: /// \param CapturedVars Values captured in the current region. virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *OutlinedFn, const Expr *IfCond, + llvm::Value *OutlinedFn, + llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef<llvm::Value *> CapturedVars); + + /// \brief Emit the target regions enclosed in \a GD function definition or + /// the function itself in case it is a valid device function. Returns true if + /// \a GD was dealt with successfully. + /// \param FD Function to scan. + virtual bool emitTargetFunctions(GlobalDecl GD); + + /// \brief Emit the global variable if it is a valid device global variable. + /// Returns true if \a GD was dealt with successfully. + /// \param GD Variable declaration to emit. + virtual bool emitTargetGlobalVariable(GlobalDecl GD); + + /// \brief Emit the global \a GD if it is meaningful for the target. Returns + /// if it was emitted succesfully. + /// \param GD Global to scan. + virtual bool emitTargetGlobal(GlobalDecl GD); + + /// \brief Creates the offloading descriptor in the event any target region + /// was emitted in the current module and return the function that registers + /// it. + virtual llvm::Function *emitRegistrationFunction(); }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 82011984f8e..14917c20c53 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2571,14 +2571,8 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; GenerateOpenMPCapturedVars(CS, CapturedVars); - // Emit target region as a standalone region. - auto &&CodeGen = [&CS](CodeGenFunction &CGF) { - CGF.EmitStmt(CS.getCapturedStmt()); - }; - - // Obtain the target region outlined function. - llvm::Value *Fn = - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen); + llvm::Function *Fn = nullptr; + llvm::Constant *FnID = nullptr; // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; @@ -2593,7 +2587,34 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { Device = C->getDevice(); } - CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device, + // Check if we have an if clause whose conditional always evaluates to false + // or if we do not have any targets specified. If so the target region is not + // an offload entry point. + bool IsOffloadEntry = true; + if (IfCond) { + bool Val; + if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) + IsOffloadEntry = false; + } + if (CGM.getLangOpts().OMPTargetTriples.empty()) + IsOffloadEntry = false; + + assert(CurFuncDecl && "No parent declaration for target region!"); + StringRef ParentName; + // In case we have Ctors/Dtors we use the complete type variant to produce + // the mangling of the device outlined kernel. + if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) + ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); + else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) + ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); + else + ParentName = + CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); + + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, + IsOffloadEntry); + + CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, CapturedVars); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 173b0dcba1c..f57785f6b58 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -375,6 +375,10 @@ void CodeGenModule::Release() { if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction()) AddGlobalDtor(CudaDtorFunction); } + if (OpenMPRuntime) + if (llvm::Function *OpenMPRegistrationFunction = + OpenMPRuntime->emitRegistrationFunction()) + AddGlobalCtor(OpenMPRegistrationFunction, 0); if (PGOReader) { getModule().setMaximumFunctionCount(PGOReader->getMaximumFunctionCount()); if (PGOStats.hasDiagnostics()) @@ -1490,6 +1494,11 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + // If this is OpenMP device, check if it is legal to emit this global + // normally. + if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) + return; + // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast<FunctionDecl>(Global)) { // Forward declarations are emitted lazily on first use. @@ -3596,6 +3605,9 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { // File-scope asm is ignored during device-side CUDA compilation. if (LangOpts.CUDA && LangOpts.CUDAIsDevice) break; + // File-scope asm is ignored during device-side OpenMP compilation. + if (LangOpts.OpenMPIsDevice) + break; auto *AD = cast<FileScopeAsmDecl>(D); getModule().appendModuleInlineAsm(AD->getAsmString()->getString()); break; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 621acca6f4e..08f762f0c21 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1795,6 +1795,30 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Opts.OpenMP = Args.hasArg(options::OPT_fopenmp); Opts.OpenMPUseTLS = Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls); + Opts.OpenMPIsDevice = + Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device); + + // Get the OpenMP target triples if any. + if (Arg *A = Args.getLastArg(options::OPT_omptargets_EQ)) { + + for (unsigned i = 0; i < A->getNumValues(); ++i) { + llvm::Triple TT(A->getValue(i)); + + if (TT.getArch() == llvm::Triple::UnknownArch) + Diags.Report(clang::diag::err_drv_invalid_omp_target) << A->getValue(i); + else + Opts.OMPTargetTriples.push_back(TT); + } + } + + // Get OpenMP host file path if any and report if a non existent file is + // found + if (Arg *A = Args.getLastArg(options::OPT_omp_host_ir_file_path)) { + Opts.OMPHostIRFile = A->getValue(); + if (!llvm::sys::fs::exists(Opts.OMPHostIRFile)) + Diags.Report(clang::diag::err_drv_omp_host_ir_file_not_found) + << Opts.OMPHostIRFile; + } // Record whether the __DEPRECATED define was requested. Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 7d88a31f44a..a279475eeaf 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4699,6 +4699,13 @@ bool ASTReader::ParseLanguageOptions(const RecordData &Record, } LangOpts.CommentOpts.ParseAllComments = Record[Idx++]; + // OpenMP offloading options. + for (unsigned N = Record[Idx++]; N; --N) { + LangOpts.OMPTargetTriples.push_back(llvm::Triple(ReadString(Record, Idx))); + } + + LangOpts.OMPHostIRFile = ReadString(Record, Idx); + return Listener.ReadLanguageOptions(LangOpts, Complain, AllowCompatibleDifferences); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 128935c5c73..0f50d7a42ea 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1323,6 +1323,13 @@ uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP, } Record.push_back(LangOpts.CommentOpts.ParseAllComments); + // OpenMP offloading options. + Record.push_back(LangOpts.OMPTargetTriples.size()); + for (auto &T : LangOpts.OMPTargetTriples) + AddString(T.getTriple(), Record); + + AddString(LangOpts.OMPHostIRFile, Record); + Stream.EmitRecord(LANGUAGE_OPTIONS, Record); // Target options. |