diff options
| author | Jason Henline <jhen@google.com> | 2016-10-25 20:38:08 +0000 | 
|---|---|---|
| committer | Jason Henline <jhen@google.com> | 2016-10-25 20:38:08 +0000 | 
| commit | b3f709e10f37225ae65c1d48c4623f6abc2cac1e (patch) | |
| tree | 935df072bbc2ee7873c138dd75f04a32d96c2249 /parallel-libs | |
| parent | 209a77d8d9247b7612025a15f0b4c18bc49e66eb (diff) | |
| download | bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.tar.gz bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.zip  | |
[SE] Remove StreamExecutor
Summary:
The project has been renamed to Acxxel, so this old directory needs to
be deleted.
Reviewers: jlebar, jprice
Subscribers: beanz, mgorny, parallel_libs-commits, modocache
Differential Revision: https://reviews.llvm.org/D25964
llvm-svn: 285115
Diffstat (limited to 'parallel-libs')
51 files changed, 0 insertions, 7668 deletions
diff --git a/parallel-libs/CMakeLists.txt b/parallel-libs/CMakeLists.txt index e96b2e4a479..c1fcf45cd71 100644 --- a/parallel-libs/CMakeLists.txt +++ b/parallel-libs/CMakeLists.txt @@ -1,3 +1 @@  cmake_minimum_required(VERSION 3.1) - -add_subdirectory(streamexecutor) diff --git a/parallel-libs/streamexecutor/CMakeLists.txt b/parallel-libs/streamexecutor/CMakeLists.txt deleted file mode 100644 index b1862c5e524..00000000000 --- a/parallel-libs/streamexecutor/CMakeLists.txt +++ /dev/null @@ -1,118 +0,0 @@ -cmake_minimum_required(VERSION 3.1) - -option(STREAM_EXECUTOR_UNIT_TESTS "enable unit tests" ON) -option(STREAM_EXECUTOR_ENABLE_DOXYGEN "enable StreamExecutor doxygen" ON) -option( -    STREAM_EXECUTOR_ENABLE_CONFIG_TOOL -    "enable building streamexecutor-config tool" -    ON) -option(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM -    "enable building the CUDA StreamExecutor platform \ -(see CMake's 'FindCUDA' documentation for info on specifying the CUDA path)" -    OFF) - -configure_file( -    "include/streamexecutor/PlatformOptions.h.in" -    "include/streamexecutor/PlatformOptions.h") - -# First find includes relative to the streamexecutor top-level source path. -include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/include) -# Also look for configured headers in the top-level binary directory. -include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include) - -# If we are not building as part of LLVM, build StreamExecutor as a standalone -# project using LLVM as an external library: -string( -    COMPARE -    EQUAL -    "${CMAKE_SOURCE_DIR}" -    "${CMAKE_CURRENT_SOURCE_DIR}" -    STREAM_EXECUTOR_STANDALONE) - -if(STREAM_EXECUTOR_STANDALONE) -    project(StreamExecutor) - -    find_package(LLVM REQUIRED CONFIG) -    message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") -    message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - -    include_directories(${LLVM_INCLUDE_DIRS}) -    add_definitions(${LLVM_DEFINITIONS}) - -    # If LLVM does not have RTTI, don't use it here either. -    if (NOT LLVM_ENABLE_RTTI) -        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") -    endif() - -    set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") -    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") -    include(AddLLVM) - -    if(STREAM_EXECUTOR_UNIT_TESTS) -        enable_testing() -        find_package(GTest REQUIRED) -        include_directories(${GTEST_INCLUDE_DIRS}) -        find_package(Threads REQUIRED) -    endif() -else(NOT STREAM_EXECUTOR_STANDALONE) -    if(STREAM_EXECUTOR_UNIT_TESTS) -        include_directories( -            "${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include") -    endif() -endif(STREAM_EXECUTOR_STANDALONE) - -# Find the libraries that correspond to the LLVM components -# that we wish to use -llvm_map_components_to_libnames(llvm_libs support symbolize) - -# Insist on C++ 11 features. -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -# Add warning flags. -set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter") - -# Check for CUDA if it is enabled. -if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) -    find_package(CUDA REQUIRED) -    include_directories(${CUDA_INCLUDE_DIRS}) -    find_library(CUDA_DRIVER_LIBRARY cuda) -    if(NOT CUDA_DRIVER_LIBRARY) -        message(FATAL_ERROR -            "could not find libcuda, \ -is the CUDA driver is installed on your system?") -    endif() -    set( -        STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT -        $<TARGET_OBJECTS:streamexecutor_cuda_platform>) -    set( -        STREAM_EXECUTOR_LIBCUDA_LIBRARIES -        ${CUDA_DRIVER_LIBRARY}) -endif(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) - -add_subdirectory(lib) -add_subdirectory(examples) - -if(STREAM_EXECUTOR_UNIT_TESTS) -    add_subdirectory(unittests) -endif() - -if(STREAM_EXECUTOR_ENABLE_CONFIG_TOOL ) -    add_subdirectory(tools/streamexecutor-config) -endif(STREAM_EXECUTOR_ENABLE_CONFIG_TOOL ) - -install(DIRECTORY include/ DESTINATION include) - -if (STREAM_EXECUTOR_ENABLE_DOXYGEN) -    find_package(Doxygen REQUIRED) -    configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) -    add_custom_target( -        doc -        ${DOXYGEN_EXECUTABLE} -        ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile -        WORKING_DIRECTORY -        ${CMAKE_CURRENT_BINARY_DIR} -        COMMENT -        "Generating API documentation with Doxygen" -        VERBATIM) -endif(STREAM_EXECUTOR_ENABLE_DOXYGEN) diff --git a/parallel-libs/streamexecutor/Doxyfile.in b/parallel-libs/streamexecutor/Doxyfile.in deleted file mode 100644 index 0b237349068..00000000000 --- a/parallel-libs/streamexecutor/Doxyfile.in +++ /dev/null @@ -1,2303 +0,0 @@ -# Doxyfile 1.8.6 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all text -# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv -# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv -# for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING      = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME           = "StreamExecutor" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER         = - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF          = - -# With the PROJECT_LOGO tag one can specify an logo or icon that is included in -# the documentation. The maximum height of the logo should not exceed 55 pixels -# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo -# to the output directory. - -PROJECT_LOGO           = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY       = - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS         = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE        = English - -# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC      = YES - -# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF           = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF       = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC    = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB  = NO - -# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES        = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH        = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH    = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES            = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF      = Yes - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF           = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS           = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a -# new page for each member. If set to NO, the documentation of a member will be -# part of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES  = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE               = 4 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines. - -ALIASES                = - -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST              = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C  = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA   = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN   = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL   = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make -# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C -# (default is Fortran), use: inc=Fortran f=C. -# -# Note For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING      = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See http://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT       = YES - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by by putting a % sign in front of the word -# or globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT       = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT    = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT        = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT            = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT   = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC   = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING            = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS  = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT   = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE      = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL            = YES - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE        = NO - -# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE        = NO - -# If the EXTRACT_STATIC tag is set to YES all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC         = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES  = YES - -# This flag is only useful for Objective-C code. When set to YES local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS  = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES   = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS     = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO these classes will be included in the various overviews. This option has -# no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES     = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO these declarations will be -# included in the documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS  = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS      = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS          = NO - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES       = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES       = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES     = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC  = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES   = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO            = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS       = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS        = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES       = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME     = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING  = NO - -# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the -# todo list. This list is created by putting \todo commands in the -# documentation. -# The default value is: YES. - -GENERATE_TODOLIST      = YES - -# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the -# test list. This list is created by putting \test commands in the -# documentation. -# The default value is: YES. - -GENERATE_TESTLIST      = YES - -# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST       = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if <section_label> ... \endif and \cond <section_label> -# ... \endcond blocks. - -ENABLED_SECTIONS       = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES  = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES the list -# will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES        = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES             = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES        = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER    = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE            = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. Do not use file names with spaces, bibtex cannot handle them. See -# also \cite for info how to create references. - -CITE_BIB_FILES         = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET                  = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS               = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED   = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR      = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO doxygen will only warn about wrong or incomplete parameter -# documentation, but not about the absence of documentation. -# The default value is: NO. - -WARN_NO_PARAMDOC       = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT            = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE           = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. -# Note: If this tag is empty the current directory is searched. - -INPUT                  = @CMAKE_CURRENT_SOURCE_DIR@ - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: http://www.gnu.org/software/libiconv) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING         = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank the -# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, -# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, -# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, -# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, -# *.qsf, *.as and *.js. - -FILE_PATTERNS          = - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE              = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE                = - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS       = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS       = */examples/* */tools/* */unittests/* - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS        = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH           = @CMAKE_CURRENT_SOURCE_DIR@/examples - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS       = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE      = YES - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH             = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# <filter> <input-file> -# -# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. - -INPUT_FILTER           = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. - -FILTER_PATTERNS        = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER ) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES    = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER         = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES         = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS    = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# function all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION    = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES, then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS        = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see http://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the config file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS              = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS       = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX     = YES - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX    = 1 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX          = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML          = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT            = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION    = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER            = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER            = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET        = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- -# defined cascading style sheet that is included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefor more robust against future updates. -# Doxygen will copy the style sheet file to the output directory. For an example -# see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET  = @CMAKE_CURRENT_SOURCE_DIR@/customdoxygen.css - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES       = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the stylesheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# http://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE    = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT    = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA  = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to NO can help when comparing the output of multiple runs. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP         = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS  = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: http://developer.apple.com/tools/xcode/), introduced with -# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET        = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME        = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID       = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID    = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME  = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP      = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE               = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler ( hhc.exe). If non-empty -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION           = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI           = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING     = - -# The BINARY_TOC flag controls whether a binary table of contents is generated ( -# YES) or a normal table of contents ( NO) in the .chm file. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC             = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND             = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP           = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE               = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE          = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER     = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME   = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS  = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS  = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION           = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP   = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID         = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX          = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW      = YES - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE   = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH         = 250 - -# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW    = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE       = 10 - -# Use the FORMULA_TRANPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT    = YES - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# http://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using prerendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX            = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT         = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from http://www.mathjax.org before deployment. -# The default value is: http://cdn.mathjax.org/mathjax/latest. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS     = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE       = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use <access key> + S -# (what the <access key> is depends on the OS and browser, but it is typically -# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down -# key> to jump into the search results window, the results can be navigated -# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel -# the search. The filter options can be selected when the cursor is inside the -# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys> -# to select a filter and <Enter> or <escape> to activate or cancel the filter -# option. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -SEARCHENGINE           = YES - -# When the SERVER_BASED_SEARCH tag is enabled the search engine will be -# implemented using a web server instead of a web client using Javascript. There -# are two flavours of web server based searching depending on the -# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for -# searching and an index file used by the script. When EXTERNAL_SEARCH is -# enabled the indexing and searching needs to be provided by external tools. See -# the section "External Indexing and Searching" for details. -# The default value is: NO. -# This tag requires that the tag SEARCHENGINE is set to YES. - -SERVER_BASED_SEARCH    = NO - -# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP -# script for searching. Instead the search results are written to an XML file -# which needs to be processed by an external indexer. Doxygen will invoke an -# external search engine pointed to by the SEARCHENGINE_URL option to obtain the -# search results. -# -# Doxygen ships with an example indexer ( doxyindexer) and search engine -# (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). -# -# See the section "External Indexing and Searching" for details. -# The default value is: NO. -# This tag requires that the tag SEARCHENGINE is set to YES. - -EXTERNAL_SEARCH        = NO - -# The SEARCHENGINE_URL should point to a search engine hosted by a web server -# which will return the search results when EXTERNAL_SEARCH is enabled. -# -# Doxygen ships with an example indexer ( doxyindexer) and search engine -# (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). See the section "External Indexing and -# Searching" for details. -# This tag requires that the tag SEARCHENGINE is set to YES. - -SEARCHENGINE_URL       = - -# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed -# search data is written to a file for indexing by an external tool. With the -# SEARCHDATA_FILE tag the name of this file can be specified. -# The default file is: searchdata.xml. -# This tag requires that the tag SEARCHENGINE is set to YES. - -SEARCHDATA_FILE        = searchdata.xml - -# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the -# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is -# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple -# projects and redirect the results back to the right project. -# This tag requires that the tag SEARCHENGINE is set to YES. - -EXTERNAL_SEARCH_ID     = - -# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen -# projects other than the one defined by this configuration file, but that are -# all added to the same external search index. Each project needs to have a -# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of -# to a relative location where the documentation can be found. The format is: -# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ... -# This tag requires that the tag SEARCHENGINE is set to YES. - -EXTRA_SEARCH_MAPPINGS  = - -#--------------------------------------------------------------------------- -# Configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output. -# The default value is: YES. - -GENERATE_LATEX         = YES - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: latex. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_OUTPUT           = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. -# -# Note that when enabling USE_PDFLATEX this option is only used for generating -# bitmaps for formulas in the HTML output, but not in the Makefile that is -# written to the output directory. -# The default file is: latex. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_CMD_NAME         = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate -# index for LaTeX. -# The default file is: makeindex. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -MAKEINDEX_CMD_NAME     = makeindex - -# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX -# documents. This may be useful for small projects and may help to save some -# trees in general. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -COMPACT_LATEX          = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used by the -# printer. -# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x -# 14 inches) and executive (7.25 x 10.5 inches). -# The default value is: a4. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -PAPER_TYPE             = a4 - -# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names -# that should be included in the LaTeX output. To get the times font for -# instance you can specify -# EXTRA_PACKAGES=times -# If left blank no extra packages will be included. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -EXTRA_PACKAGES         = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. -# -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will -# replace them by respectively the title of the page, the current date and time, -# only the current date, the version number of doxygen, the project name (see -# PROJECT_NAME), or the project number (see PROJECT_NUMBER). -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_HEADER           = - -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. -# -# Note: Only use a user-defined footer if you know what you are doing! -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_FOOTER           = - -# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the LATEX_OUTPUT output -# directory. Note that the files will be copied as-is; there are no commands or -# markers available. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_EXTRA_FILES      = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is -# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will -# contain links (just like the HTML output) instead of page references. This -# makes the output suitable for online browsing using a PDF viewer. -# The default value is: YES. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -PDF_HYPERLINKS         = YES - -# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate -# the PDF file directly from the LaTeX files. Set this option to YES to get a -# higher quality PDF documentation. -# The default value is: YES. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -USE_PDFLATEX           = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode -# command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_BATCHMODE        = NO - -# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the -# index chapters (such as File Index, Compound Index, etc.) in the output. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_HIDE_INDICES     = NO - -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE      = NO - -# The LATEX_BIB_STYLE tag can be used to specify the style to use for the -# bibliography, e.g. plainnat, or ieeetr. See -# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. -# The default value is: plain. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_BIB_STYLE        = plain - -#--------------------------------------------------------------------------- -# Configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The -# RTF output is optimized for Word 97 and may not look too pretty with other RTF -# readers/editors. -# The default value is: NO. - -GENERATE_RTF           = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: rtf. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_OUTPUT             = rtf - -# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF -# documents. This may be useful for small projects and may help to save some -# trees in general. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -COMPACT_RTF            = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will -# contain hyperlink fields. The RTF file will contain links (just like the HTML -# output) instead of page references. This makes the output suitable for online -# browsing using Word or some other Word compatible readers that support those -# fields. -# -# Note: WordPad (write) and others do not support links. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_HYPERLINKS         = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's config -# file, i.e. a series of assignments. You only have to provide replacements, -# missing definitions are set to their default value. -# -# See also section "Doxygen usage" for information on how to generate the -# default style sheet that doxygen normally uses. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_STYLESHEET_FILE    = - -# Set optional variables used in the generation of an RTF document. Syntax is -# similar to doxygen's config file. A template extensions file can be generated -# using doxygen -e rtf extensionFile. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_EXTENSIONS_FILE    = - -#--------------------------------------------------------------------------- -# Configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for -# classes and files. -# The default value is: NO. - -GENERATE_MAN           = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. A directory man3 will be created inside the directory specified by -# MAN_OUTPUT. -# The default directory is: man. -# This tag requires that the tag GENERATE_MAN is set to YES. - -MAN_OUTPUT             = man - -# The MAN_EXTENSION tag determines the extension that is added to the generated -# man pages. In case the manual section does not start with a number, the number -# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is -# optional. -# The default value is: .3. -# This tag requires that the tag GENERATE_MAN is set to YES. - -MAN_EXTENSION          = .3 - -# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it -# will generate one additional man file for each entity documented in the real -# man page(s). These additional files only source the real man page, but without -# them the man command would be unable to find the correct page. -# The default value is: NO. -# This tag requires that the tag GENERATE_MAN is set to YES. - -MAN_LINKS              = NO - -#--------------------------------------------------------------------------- -# Configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that -# captures the structure of the code including all documentation. -# The default value is: NO. - -GENERATE_XML           = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: xml. -# This tag requires that the tag GENERATE_XML is set to YES. - -XML_OUTPUT             = xml - -# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a -# validating XML parser to check the syntax of the XML files. -# This tag requires that the tag GENERATE_XML is set to YES. - -XML_SCHEMA             = - -# The XML_DTD tag can be used to specify a XML DTD, which can be used by a -# validating XML parser to check the syntax of the XML files. -# This tag requires that the tag GENERATE_XML is set to YES. - -XML_DTD                = - -# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program -# listings (including syntax highlighting and cross-referencing information) to -# the XML output. Note that enabling this will significantly increase the size -# of the XML output. -# The default value is: YES. -# This tag requires that the tag GENERATE_XML is set to YES. - -XML_PROGRAMLISTING     = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the DOCBOOK output -#--------------------------------------------------------------------------- - -# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files -# that can be used to generate PDF. -# The default value is: NO. - -GENERATE_DOCBOOK       = NO - -# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in -# front of it. -# The default directory is: docbook. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_OUTPUT         = docbook - -#--------------------------------------------------------------------------- -# Configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen -# Definitions (see http://autogen.sf.net) file that captures the structure of -# the code including all documentation. Note that this feature is still -# experimental and incomplete at the moment. -# The default value is: NO. - -GENERATE_AUTOGEN_DEF   = NO - -#--------------------------------------------------------------------------- -# Configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module -# file that captures the structure of the code including all documentation. -# -# Note that this feature is still experimental and incomplete at the moment. -# The default value is: NO. - -GENERATE_PERLMOD       = NO - -# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary -# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI -# output from the Perl module output. -# The default value is: NO. -# This tag requires that the tag GENERATE_PERLMOD is set to YES. - -PERLMOD_LATEX          = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely -# formatted so it can be parsed by a human reader. This is useful if you want to -# understand what is going on. On the other hand, if this tag is set to NO the -# size of the Perl module output will be much smaller and Perl will parse it -# just the same. -# The default value is: YES. -# This tag requires that the tag GENERATE_PERLMOD is set to YES. - -PERLMOD_PRETTY         = YES - -# The names of the make variables in the generated doxyrules.make file are -# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful -# so different doxyrules.make files included by the same Makefile don't -# overwrite each other's variables. -# This tag requires that the tag GENERATE_PERLMOD is set to YES. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all -# C-preprocessor directives found in the sources and include files. -# The default value is: YES. - -ENABLE_PREPROCESSING   = YES - -# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names -# in the source code. If set to NO only conditional compilation will be -# performed. Macro expansion can be done in a controlled way by setting -# EXPAND_ONLY_PREDEF to YES. -# The default value is: NO. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -MACRO_EXPANSION        = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then -# the macro expansion is limited to the macros specified with the PREDEFINED and -# EXPAND_AS_DEFINED tags. -# The default value is: NO. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -EXPAND_ONLY_PREDEF     = NO - -# If the SEARCH_INCLUDES tag is set to YES the includes files in the -# INCLUDE_PATH will be searched if a #include is found. -# The default value is: YES. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -SEARCH_INCLUDES        = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by the -# preprocessor. -# This tag requires that the tag SEARCH_INCLUDES is set to YES. - -INCLUDE_PATH           = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will be -# used. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -INCLUDE_FILE_PATTERNS  = - -# The PREDEFINED tag can be used to specify one or more macro names that are -# defined before the preprocessor is started (similar to the -D option of e.g. -# gcc). The argument of the tag is a list of macros of the form: name or -# name=definition (no spaces). If the definition and the "=" are omitted, "=1" -# is assumed. To prevent a macro definition from being undefined via #undef or -# recursively expanded use the := operator instead of the = operator. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -PREDEFINED             = - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this -# tag can be used to specify a list of macro names that should be expanded. The -# macro definition that is found in the sources will be used. Use the PREDEFINED -# tag if you want to use a different macro definition that overrules the -# definition found in the source code. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -EXPAND_AS_DEFINED      = - -# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will -# remove all refrences to function-like macros that are alone on a line, have an -# all uppercase name, and do not end with a semicolon. Such function macros are -# typically used for boiler-plate code, and will confuse the parser if not -# removed. -# The default value is: YES. -# This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - -SKIP_FUNCTION_MACROS   = YES - -#--------------------------------------------------------------------------- -# Configuration options related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES tag can be used to specify one or more tag files. For each tag -# file the location of the external documentation should be added. The format of -# a tag file without this location is as follows: -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where loc1 and loc2 can be relative or absolute paths or URLs. See the -# section "Linking to external documentation" for more information about the use -# of tag files. -# Note: Each tag file must have an unique name (where the name does NOT include -# the path). If a tag file is not located in the directory in which doxygen is -# run, you must also specify the path to the tagfile here. - -TAGFILES               = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create a -# tag file that is based on the input files it reads. See section "Linking to -# external documentation" for more information about the usage of tag files. - -GENERATE_TAGFILE       = - -# If the ALLEXTERNALS tag is set to YES all external class will be listed in the -# class index. If set to NO only the inherited external classes will be listed. -# The default value is: NO. - -ALLEXTERNALS           = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in -# the modules index. If set to NO, only the current project's groups will be -# listed. -# The default value is: YES. - -EXTERNAL_GROUPS        = YES - -# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in -# the related pages index. If set to NO, only the current project's pages will -# be listed. -# The default value is: YES. - -EXTERNAL_PAGES         = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH              = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS         = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH            = - -# You can include diagrams made with dia in doxygen documentation. Doxygen will -# then run dia to produce the diagram and insert it in the documentation. The -# DIA_PATH tag allows you to specify the directory where the dia binary resides. -# If left empty dia is assumed to be found in the default search path. - -DIA_PATH               = - -# If set to YES, the inheritance and collaboration graphs will hide inheritance -# and usage relations if the target is undocumented or is not a class. -# The default value is: YES. - -HIDE_UNDOC_RELATIONS   = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz (see: -# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent -# Bell Labs. The other options in this section have no effect if this option is -# set to NO -# The default value is: NO. - -HAVE_DOT               = NO - -# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed -# to run in parallel. When set to 0 doxygen will base this on the number of -# processors available in the system. You can set it explicitly to a value -# larger than 0 to get control over the balance between CPU load and processing -# speed. -# Minimum value: 0, maximum value: 32, default value: 0. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_NUM_THREADS        = 0 - -# When you want a differently looking font n the dot files that doxygen -# generates you can specify the font name using DOT_FONTNAME. You need to make -# sure dot is able to find the font, which can be done by putting it in a -# standard location or by setting the DOTFONTPATH environment variable or by -# setting DOT_FONTPATH to the directory containing the font. -# The default value is: Helvetica. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_FONTNAME           = Helvetica - -# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of -# dot graphs. -# Minimum value: 4, maximum value: 24, default value: 10. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_FONTSIZE           = 10 - -# By default doxygen will tell dot to use the default font as specified with -# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set -# the path where dot can find it using this tag. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_FONTPATH           = - -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -CLASS_GRAPH            = YES - -# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a -# graph for each documented class showing the direct and indirect implementation -# dependencies (inheritance, containment, and class references variables) of the -# class with other documented classes. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -COLLABORATION_GRAPH    = YES - -# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for -# groups, showing the direct groups dependencies. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -GROUP_GRAPHS           = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -UML_LOOK               = NO - -# If the UML_LOOK tag is enabled, the fields and methods are shown inside the -# class node. If there are many fields or methods and many nodes the graph may -# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the -# number of items for each type to make the size more manageable. Set this to 0 -# for no limit. Note that the threshold may be exceeded by 50% before the limit -# is enforced. So when you set the threshold to 10, up to 15 fields may appear, -# but if the number exceeds 15, the total amount of fields shown is limited to -# 10. -# Minimum value: 0, maximum value: 100, default value: 10. -# This tag requires that the tag HAVE_DOT is set to YES. - -UML_LIMIT_NUM_FIELDS   = 10 - -# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and -# collaboration graphs will show the relations between templates and their -# instances. -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -TEMPLATE_RELATIONS     = NO - -# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to -# YES then doxygen will generate a graph for each documented file showing the -# direct and indirect include dependencies of the file with other documented -# files. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -INCLUDE_GRAPH          = YES - -# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are -# set to YES then doxygen will generate a graph for each documented file showing -# the direct and indirect include dependencies of the file with other documented -# files. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -INCLUDED_BY_GRAPH      = YES - -# If the CALL_GRAPH tag is set to YES then doxygen will generate a call -# dependency graph for every global function or class method. -# -# Note that enabling this option will significantly increase the time of a run. -# So in most cases it will be better to enable call graphs for selected -# functions only using the \callgraph command. -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -CALL_GRAPH             = NO - -# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller -# dependency graph for every global function or class method. -# -# Note that enabling this option will significantly increase the time of a run. -# So in most cases it will be better to enable caller graphs for selected -# functions only using the \callergraph command. -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -CALLER_GRAPH           = NO - -# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical -# hierarchy of all classes instead of a textual one. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -GRAPHICAL_HIERARCHY    = YES - -# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the -# dependencies a directory has on other directories in a graphical way. The -# dependency relations are determined by the #include relations between the -# files in the directories. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -DIRECTORY_GRAPH        = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. -# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order -# to make the SVG files visible in IE 9+ (other browsers do not have this -# requirement). -# Possible values are: png, jpg, gif and svg. -# The default value is: png. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_IMAGE_FORMAT       = png - -# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to -# enable generation of interactive SVG images that allow zooming and panning. -# -# Note that this requires a modern browser other than Internet Explorer. Tested -# and working are Firefox, Chrome, Safari, and Opera. -# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make -# the SVG files visible. Older versions of IE do not have SVG support. -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -INTERACTIVE_SVG        = NO - -# The DOT_PATH tag can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_PATH               = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the \dotfile -# command). -# This tag requires that the tag HAVE_DOT is set to YES. - -DOTFILE_DIRS           = - -# The MSCFILE_DIRS tag can be used to specify one or more directories that -# contain msc files that are included in the documentation (see the \mscfile -# command). - -MSCFILE_DIRS           = - -# The DIAFILE_DIRS tag can be used to specify one or more directories that -# contain dia files that are included in the documentation (see the \diafile -# command). - -DIAFILE_DIRS           = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes -# that will be shown in the graph. If the number of nodes in a graph becomes -# larger than this value, doxygen will truncate the graph, which is visualized -# by representing a node as a red box. Note that doxygen if the number of direct -# children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that -# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. -# Minimum value: 0, maximum value: 10000, default value: 50. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_GRAPH_MAX_NODES    = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs -# generated by dot. A depth value of 3 means that only nodes reachable from the -# root by following a path via at most 3 edges will be shown. Nodes that lay -# further from the root node will be omitted. Note that setting this option to 1 -# or 2 may greatly reduce the computation time needed for large code bases. Also -# note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. -# Minimum value: 0, maximum value: 1000, default value: 0. -# This tag requires that the tag HAVE_DOT is set to YES. - -MAX_DOT_GRAPH_DEPTH    = 0 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not seem -# to support this out of the box. -# -# Warning: Depending on the platform used, enabling this option may lead to -# badly anti-aliased labels on the edges of a graph (i.e. they become hard to -# read). -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_TRANSPARENT        = NO - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) support -# this, this feature is disabled by default. -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_MULTI_TARGETS      = YES - -# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page -# explaining the meaning of the various boxes and arrows in the dot generated -# graphs. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -GENERATE_LEGEND        = YES - -# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot -# files that are used to generate the various graphs. -# The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_CLEANUP            = YES diff --git a/parallel-libs/streamexecutor/README.txt b/parallel-libs/streamexecutor/README.txt deleted file mode 100644 index b8ec47d3d5a..00000000000 --- a/parallel-libs/streamexecutor/README.txt +++ /dev/null @@ -1,11 +0,0 @@ -StreamExecutor -============== - -StreamExecutor is a wrapper around CUDA and OpenCL (host-side) programming -models (runtimes). This abstraction cleanly permits host code to target either -CUDA or OpenCL devices with identically-functioning data parallel kernels. It -manages the execution of concurrent work targeting the accelerator, similar to a -host-side Executor. - -This version of StreamExecutor can be built either as a sub-project of the LLVM -project or as a standalone project depending on LLVM as an external package. diff --git a/parallel-libs/streamexecutor/customdoxygen.css b/parallel-libs/streamexecutor/customdoxygen.css deleted file mode 100644 index a40ac950e3f..00000000000 --- a/parallel-libs/streamexecutor/customdoxygen.css +++ /dev/null @@ -1,20 +0,0 @@ -body { -  background-color: #e0e0eb; -} - -div.header { -  margin-left: auto; -  margin-right: auto; -  max-width: 60em; -  padding-left: 2em; -  padding-right: 2em; -} - -div.contents { -  margin-left: auto; -  margin-right: auto; -  max-width: 60em; -  background-color: white; -  padding: 2em; -  border-radius: 1em; -} diff --git a/parallel-libs/streamexecutor/examples/CMakeLists.txt b/parallel-libs/streamexecutor/examples/CMakeLists.txt deleted file mode 100644 index cb061d5ca96..00000000000 --- a/parallel-libs/streamexecutor/examples/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_executable(cuda_saxpy_example CUDASaxpy.cpp) -target_link_libraries(cuda_saxpy_example streamexecutor) - -add_executable(host_saxpy_example HostSaxpy.cpp) -target_link_libraries(host_saxpy_example streamexecutor) diff --git a/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp b/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp deleted file mode 100644 index 6b2c59e5cd6..00000000000 --- a/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===-- CUDASaxpy.cpp - Example of CUDA saxpy with StreamExecutor API -----===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains example code demonstrating the usage of the -/// StreamExecutor API. Snippets of this file will be included as code examples -/// in documentation. Taking these examples from a real source file guarantees -/// that the examples will always compile. -/// -//===----------------------------------------------------------------------===// - -#include <algorithm> -#include <cassert> -#include <cstdlib> -#include <vector> - -#include "streamexecutor/StreamExecutor.h" - -/// [Example saxpy compiler-generated] -// Code in this namespace is generated by the compiler (e.g. clang). -// -// The name of this namespace may depend on the compiler that generated it, so -// this is just an example name. -namespace __compilergen { - -// Specialization of the streamexecutor::Kernel template class for the parameter -// types of the saxpy(float A, float *X, float *Y) kernel. -using SaxpyKernel = -    streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>, -                           streamexecutor::GlobalDeviceMemory<float>>; - -// A string containing the PTX code generated by the device compiler for the -// saxpy kernel. String contents not shown here. -extern const char *SaxpyPTX; - -// A global instance of a loader spec that knows how to load the code in the -// SaxpyPTX string. -static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() { -  streamexecutor::MultiKernelLoaderSpec Spec; -  Spec.addCUDAPTXInMemory("saxpy", {{{2, 0}, SaxpyPTX}}); -  return Spec; -}(); - -} // namespace __compilergen -/// [Example saxpy compiler-generated] - -/// [Example saxpy host PTX] -// The PTX text for a saxpy kernel. -const char *__compilergen::SaxpyPTX = R"( -  .version 4.3 -  .target sm_20 -  .address_size 64 - -  .visible .entry saxpy(.param .f32 A, .param .u64 X, .param .u64 Y) { -    .reg .f32 %AValue; -    .reg .f32 %XValue; -    .reg .f32 %YValue; -    .reg .f32 %Result; - -    .reg .b64 %XBaseAddrGeneric; -    .reg .b64 %YBaseAddrGeneric; -    .reg .b64 %XBaseAddrGlobal; -    .reg .b64 %YBaseAddrGlobal; -    .reg .b64 %XAddr; -    .reg .b64 %YAddr; -    .reg .b64 %ThreadByteOffset; - -    .reg .b32 %TID; - -    ld.param.f32 %AValue, [A]; -    ld.param.u64 %XBaseAddrGeneric, [X]; -    ld.param.u64 %YBaseAddrGeneric, [Y]; -    cvta.to.global.u64 %XBaseAddrGlobal, %XBaseAddrGeneric; -    cvta.to.global.u64 %YBaseAddrGlobal, %YBaseAddrGeneric; -    mov.u32 %TID, %tid.x; -    mul.wide.u32 %ThreadByteOffset, %TID, 4; -    add.s64 %XAddr, %ThreadByteOffset, %XBaseAddrGlobal; -    add.s64 %YAddr, %ThreadByteOffset, %YBaseAddrGlobal; -    ld.global.f32 %XValue, [%XAddr]; -    ld.global.f32 %YValue, [%YAddr]; -    fma.rn.f32 %Result, %AValue, %XValue, %YValue; -    st.global.f32 [%XAddr], %Result; -    ret; -  } -)"; -/// [Example saxpy host PTX] - -int main() { -  /// [Example saxpy host main] -  namespace se = ::streamexecutor; -  namespace cg = ::__compilergen; - -  // Create some host data. -  float A = 42.0f; -  std::vector<float> HostX = {0, 1, 2, 3}; -  std::vector<float> HostY = {4, 5, 6, 7}; -  size_t ArraySize = HostX.size(); - -  // Get a device object. -  se::Platform *Platform = -      getOrDie(se::PlatformManager::getPlatformByName("CUDA")); -  if (Platform->getDeviceCount() == 0) { -    return EXIT_FAILURE; -  } -  se::Device Device = getOrDie(Platform->getDevice(0)); - -  // Load the kernel onto the device. -  cg::SaxpyKernel Kernel = -      getOrDie(Device.createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec)); - -  se::RegisteredHostMemory<float> RegisteredX = -      getOrDie(Device.registerHostMemory<float>(HostX)); -  se::RegisteredHostMemory<float> RegisteredY = -      getOrDie(Device.registerHostMemory<float>(HostY)); - -  // Allocate memory on the device. -  se::GlobalDeviceMemory<float> X = -      getOrDie(Device.allocateDeviceMemory<float>(ArraySize)); -  se::GlobalDeviceMemory<float> Y = -      getOrDie(Device.allocateDeviceMemory<float>(ArraySize)); - -  // Run operations on a stream. -  se::Stream Stream = getOrDie(Device.createStream()); -  Stream.thenCopyH2D(RegisteredX, X) -      .thenCopyH2D(RegisteredY, Y) -      .thenLaunch(ArraySize, 1, Kernel, A, X, Y) -      .thenCopyD2H(X, RegisteredX); -  // Wait for the stream to complete. -  se::dieIfError(Stream.blockHostUntilDone()); - -  // Process output data in HostX. -  std::vector<float> ExpectedX = {4, 47, 90, 133}; -  assert(std::equal(ExpectedX.begin(), ExpectedX.end(), HostX.begin())); -  /// [Example saxpy host main] -} diff --git a/parallel-libs/streamexecutor/examples/HostSaxpy.cpp b/parallel-libs/streamexecutor/examples/HostSaxpy.cpp deleted file mode 100644 index cf81b0ba915..00000000000 --- a/parallel-libs/streamexecutor/examples/HostSaxpy.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===-- HostSaxpy.cpp - Example of host saxpy with StreamExecutor API -----===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains example code demonstrating the usage of the -/// StreamExecutor API for a host platform. -/// -//===----------------------------------------------------------------------===// - -#include <algorithm> -#include <cassert> -#include <cstdio> -#include <vector> - -#include "streamexecutor/StreamExecutor.h" - -void Saxpy(float A, float *X, float *Y, size_t N) { -  for (size_t I = 0; I < N; ++I) -    X[I] = A * X[I] + Y[I]; -} - -namespace __compilergen { -using SaxpyKernel = -    streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>, -                           streamexecutor::GlobalDeviceMemory<float>, size_t>; - -// Wrapper function converts argument addresses to arguments. -void SaxpyWrapper(const void *const *ArgumentAddresses) { -  Saxpy(*static_cast<const float *>(ArgumentAddresses[0]), -        *static_cast<float **>(const_cast<void *>(ArgumentAddresses[1])), -        *static_cast<float **>(const_cast<void *>(ArgumentAddresses[2])), -        *static_cast<const size_t *>(ArgumentAddresses[3])); -} - -// The wrapper function is what gets registered. -static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() { -  streamexecutor::MultiKernelLoaderSpec Spec; -  Spec.addHostFunction("Saxpy", SaxpyWrapper); -  return Spec; -}(); -} // namespace __compilergen - -int main() { -  namespace se = ::streamexecutor; -  namespace cg = ::__compilergen; - -  // Create some host data. -  float A = 42.0f; -  std::vector<float> HostX = {0, 1, 2, 3}; -  std::vector<float> HostY = {4, 5, 6, 7}; -  size_t ArraySize = HostX.size(); - -  // Get a device object. -  se::Platform *Platform = -      getOrDie(se::PlatformManager::getPlatformByName("host")); -  if (Platform->getDeviceCount() == 0) { -    return EXIT_FAILURE; -  } -  se::Device Device = getOrDie(Platform->getDevice(0)); - -  // Load the kernel onto the device. -  cg::SaxpyKernel Kernel = -      getOrDie(Device.createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec)); - -  se::RegisteredHostMemory<float> RegisteredX = -      getOrDie(Device.registerHostMemory<float>(HostX)); -  se::RegisteredHostMemory<float> RegisteredY = -      getOrDie(Device.registerHostMemory<float>(HostY)); - -  // Allocate memory on the device. -  se::GlobalDeviceMemory<float> X = -      getOrDie(Device.allocateDeviceMemory<float>(ArraySize)); -  se::GlobalDeviceMemory<float> Y = -      getOrDie(Device.allocateDeviceMemory<float>(ArraySize)); - -  // Run operations on a stream. -  se::Stream Stream = getOrDie(Device.createStream()); -  Stream.thenCopyH2D(RegisteredX, X) -      .thenCopyH2D(RegisteredY, Y) -      .thenLaunch(1, 1, Kernel, A, X, Y, ArraySize) -      .thenCopyD2H(X, RegisteredX); -  // Wait for the stream to complete. -  se::dieIfError(Stream.blockHostUntilDone()); - -  // Process output data in HostX. -  std::vector<float> ExpectedX = {4, 47, 90, 133}; -  assert(std::equal(ExpectedX.begin(), ExpectedX.end(), HostX.begin())); -} diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Device.h b/parallel-libs/streamexecutor/include/streamexecutor/Device.h deleted file mode 100644 index bf73655aea4..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/Device.h +++ /dev/null @@ -1,302 +0,0 @@ -//===-- Device.h - The Device class -----------------------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// The Device class which represents a single device of a specific platform. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_DEVICE_H -#define STREAMEXECUTOR_DEVICE_H - -#include <type_traits> - -#include "streamexecutor/Error.h" -#include "streamexecutor/HostMemory.h" -#include "streamexecutor/KernelSpec.h" -#include "streamexecutor/PlatformDevice.h" - -namespace streamexecutor { - -class Stream; - -/// A class representing a StreamExecutor device. -/// -/// Device instances are basically just pointers to the underlying -/// implementation, so they are small and can be passed around by value. -class Device { -public: -  explicit Device(PlatformDevice *PDevice); -  virtual ~Device(); - -  /// Gets the name of this device. -  std::string getName() const { return PDevice->getName(); } - -  /// Creates a kernel object for this device. -  template <typename KernelT> -  Expected<typename std::enable_if<std::is_base_of<KernelBase, KernelT>::value, -                                   KernelT>::type> -  createKernel(const MultiKernelLoaderSpec &Spec) { -    Expected<const void *> MaybeKernelHandle = PDevice->createKernel(Spec); -    if (!MaybeKernelHandle) -      return MaybeKernelHandle.takeError(); -    return KernelT(PDevice, *MaybeKernelHandle, Spec.getKernelName()); -  } - -  /// Creates a stream object for this device. -  Expected<Stream> createStream(); - -  /// Allocates an array of ElementCount entries of type T in device memory. -  template <typename T> -  Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) { -    Expected<void *> MaybeMemory = -        PDevice->allocateDeviceMemory(ElementCount * sizeof(T)); -    if (!MaybeMemory) -      return MaybeMemory.takeError(); -    return GlobalDeviceMemory<T>(this, *MaybeMemory, ElementCount); -  } - -  /// Registers a previously allocated host array of type T for asynchronous -  /// memory operations. -  /// -  /// Host memory registered by this function can be used for asynchronous -  /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D. -  template <typename T> -  Expected<RegisteredHostMemory<T>> -  registerHostMemory(llvm::MutableArrayRef<T> Memory) { -    if (Error E = PDevice->registerHostMemory(Memory.data(), -                                              Memory.size() * sizeof(T))) -      return std::move(E); -    return RegisteredHostMemory<T>(this, Memory.data(), Memory.size()); -  } - -  /// \anchor DeviceHostSyncCopyGroup -  /// \name Host-synchronous device memory copying functions -  /// -  /// These methods block the calling host thread while copying data to or from -  /// device memory. On the device side, these methods do not block any ongoing -  /// device calls. -  /// -  /// There are no restrictions on the host memory that is used as a source or -  /// destination in these copy methods, so there is no need to register that -  /// host memory with registerHostMemory. -  /// -  /// Each of these methods has a single template parameter, T, that specifies -  /// the type of data being copied. The ElementCount arguments specify the -  /// number of objects of type T to be copied. -  /// -  /// For ease of use, each of the methods is overloaded to take either a -  /// GlobalDeviceMemorySlice or a GlobalDeviceMemory argument in the device -  /// memory argument slots, and the GlobalDeviceMemory arguments are just -  /// converted to GlobalDeviceMemorySlice arguments internally by using -  /// GlobalDeviceMemory::asSlice. -  /// -  /// These methods perform bounds checking to make sure that the ElementCount -  /// is not too large for the source or destination. For methods that do not -  /// take an ElementCount argument, an error is returned if the source size -  /// does not exactly match the destination size. -  ///@{ - -  template <typename T> -  Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, -                           llvm::MutableArrayRef<T> Dst, size_t ElementCount) { -    if (ElementCount > Src.getElementCount()) -      return make_error("copying too many elements, " + -                        llvm::Twine(ElementCount) + -                        ", from a device array of element count " + -                        llvm::Twine(Src.getElementCount())); -    if (ElementCount > Dst.size()) -      return make_error( -          "copying too many elements, " + llvm::Twine(ElementCount) + -          ", to a host array of element count " + llvm::Twine(Dst.size())); -    return PDevice->synchronousCopyD2H(Src.getBaseMemory().getHandle(), -                                       Src.getElementOffset() * sizeof(T), -                                       Dst.data(), 0, ElementCount * sizeof(T)); -  } - -  template <typename T> -  Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, -                           llvm::MutableArrayRef<T> Dst) { -    if (Src.getElementCount() != Dst.size()) -      return make_error( -          "array size mismatch for D2H, device source has element count " + -          llvm::Twine(Src.getElementCount()) + -          " but host destination has element count " + llvm::Twine(Dst.size())); -    return synchronousCopyD2H(Src, Dst, Src.getElementCount()); -  } - -  template <typename T> -  Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst, -                           size_t ElementCount) { -    return synchronousCopyD2H(Src, llvm::MutableArrayRef<T>(Dst, ElementCount), -                              ElementCount); -  } - -  template <typename T> -  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src, -                           llvm::MutableArrayRef<T> Dst, size_t ElementCount) { -    return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); -  } - -  template <typename T> -  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src, -                           llvm::MutableArrayRef<T> Dst) { -    return synchronousCopyD2H(Src.asSlice(), Dst); -  } - -  template <typename T> -  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src, T *Dst, -                           size_t ElementCount) { -    return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount); -  } - -  template <typename T> -  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, -                           GlobalDeviceMemorySlice<T> Dst, -                           size_t ElementCount) { -    if (ElementCount > Src.size()) -      return make_error( -          "copying too many elements, " + llvm::Twine(ElementCount) + -          ", from a host array of element count " + llvm::Twine(Src.size())); -    if (ElementCount > Dst.getElementCount()) -      return make_error("copying too many elements, " + -                        llvm::Twine(ElementCount) + -                        ", to a device array of element count " + -                        llvm::Twine(Dst.getElementCount())); -    return PDevice->synchronousCopyH2D( -        Src.data(), 0, Dst.getBaseMemory().getHandle(), -        Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T)); -  } - -  template <typename T> -  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, -                           GlobalDeviceMemorySlice<T> Dst) { -    if (Src.size() != Dst.getElementCount()) -      return make_error( -          "array size mismatch for H2D, host source has element count " + -          llvm::Twine(Src.size()) + -          " but device destination has element count " + -          llvm::Twine(Dst.getElementCount())); -    return synchronousCopyH2D(Src, Dst, Dst.getElementCount()); -  } - -  template <typename T> -  Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst, -                           size_t ElementCount) { -    return synchronousCopyH2D(llvm::ArrayRef<T>(Src, ElementCount), Dst, -                              ElementCount); -  } - -  template <typename T> -  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst, -                           size_t ElementCount) { -    return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); -  } - -  template <typename T> -  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst) { -    return synchronousCopyH2D(Src, Dst.asSlice()); -  } - -  template <typename T> -  Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> &Dst, -                           size_t ElementCount) { -    return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount); -  } - -  template <typename T> -  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, -                           GlobalDeviceMemorySlice<T> Dst, -                           size_t ElementCount) { -    if (ElementCount > Src.getElementCount()) -      return make_error("copying too many elements, " + -                        llvm::Twine(ElementCount) + -                        ", from a device array of element count " + -                        llvm::Twine(Src.getElementCount())); -    if (ElementCount > Dst.getElementCount()) -      return make_error("copying too many elements, " + -                        llvm::Twine(ElementCount) + -                        ", to a device array of element count " + -                        llvm::Twine(Dst.getElementCount())); -    return PDevice->synchronousCopyD2D( -        Src.getBaseMemory().getHandle(), Src.getElementOffset() * sizeof(T), -        Dst.getBaseMemory().getHandle(), Dst.getElementOffset() * sizeof(T), -        ElementCount * sizeof(T)); -  } - -  template <typename T> -  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, -                           GlobalDeviceMemorySlice<T> Dst) { -    if (Src.getElementCount() != Dst.getElementCount()) -      return make_error( -          "array size mismatch for D2D, device source has element count " + -          llvm::Twine(Src.getElementCount()) + -          " but device destination has element count " + -          llvm::Twine(Dst.getElementCount())); -    return synchronousCopyD2D(Src, Dst, Src.getElementCount()); -  } - -  template <typename T> -  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src, -                           GlobalDeviceMemorySlice<T> Dst, -                           size_t ElementCount) { -    return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount); -  } - -  template <typename T> -  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src, -                           GlobalDeviceMemorySlice<T> Dst) { -    return synchronousCopyD2D(Src.asSlice(), Dst); -  } - -  template <typename T> -  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, -                           GlobalDeviceMemory<T> &Dst, size_t ElementCount) { -    return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount); -  } - -  template <typename T> -  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src, -                           GlobalDeviceMemory<T> &Dst) { -    return synchronousCopyD2D(Src, Dst.asSlice()); -  } - -  template <typename T> -  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src, -                           GlobalDeviceMemory<T> &Dst, size_t ElementCount) { -    return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount); -  } - -  template <typename T> -  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src, -                           GlobalDeviceMemory<T> &Dst) { -    return synchronousCopyD2D(Src.asSlice(), Dst.asSlice()); -  } - -  ///@} End host-synchronous device memory copying functions - -private: -  // Only a GlobalDeviceMemoryBase may free device memory. -  friend GlobalDeviceMemoryBase; -  Error freeDeviceMemory(const GlobalDeviceMemoryBase &Memory) { -    return PDevice->freeDeviceMemory(Memory.getHandle()); -  } - -  // Only destroyRegisteredHostMemoryInternals may unregister host memory. -  friend void internal::destroyRegisteredHostMemoryInternals(Device *, void *); -  Error unregisterHostMemory(const void *Pointer) { -    return PDevice->unregisterHostMemory(Pointer); -  } - -  PlatformDevice *PDevice; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_DEVICE_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h b/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h deleted file mode 100644 index 62f6e579933..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h +++ /dev/null @@ -1,278 +0,0 @@ -//===-- DeviceMemory.h - Types representing device memory -------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines types that represent device memory buffers. Two memory -/// spaces are represented here: global and shared. Host code can have a handle -/// to device global memory, and that handle can be used to copy data to and -/// from the device. Host code cannot have a handle to device shared memory -/// because that memory only exists during the execution of a kernel. -/// -/// GlobalDeviceMemory<T> is a handle to an array of elements of type T in -/// global device memory. It is similar to a pair of a std::unique_ptr<T> and an -/// element count to tell how many elements of type T fit in the memory pointed -/// to by that T*. -/// -/// SharedDeviceMemory<T> is just the size in elements of an array of elements -/// of type T in device shared memory. No resources are actually attached to -/// this class, it is just like a memo to the device to allocate space in shared -/// memory. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_DEVICEMEMORY_H -#define STREAMEXECUTOR_DEVICEMEMORY_H - -#include <cassert> -#include <cstddef> - -#include "streamexecutor/Error.h" - -namespace streamexecutor { - -class Device; - -template <typename ElemT> class GlobalDeviceMemory; - -/// Reference to a slice of device memory. -/// -/// Contains a base memory handle, an element count offset into that base -/// memory, and an element count for the size of the slice. -template <typename ElemT> class GlobalDeviceMemorySlice { -public: -  using ElementTy = ElemT; - -  /// Intentionally implicit so GlobalDeviceMemory<T> can be passed to functions -  /// expecting GlobalDeviceMemorySlice<T> arguments. -  GlobalDeviceMemorySlice(const GlobalDeviceMemory<ElemT> &Memory) -      : BaseMemory(Memory), ElementOffset(0), -        ElementCount(Memory.getElementCount()) {} - -  GlobalDeviceMemorySlice(const GlobalDeviceMemory<ElemT> &BaseMemory, -                          size_t ElementOffset, size_t ElementCount) -      : BaseMemory(BaseMemory), ElementOffset(ElementOffset), -        ElementCount(ElementCount) { -    assert(ElementOffset + ElementCount <= BaseMemory.getElementCount() && -           "slicing past the end of a GlobalDeviceMemory buffer"); -  } - -  /// Gets the GlobalDeviceMemory backing this slice. -  const GlobalDeviceMemory<ElemT> &getBaseMemory() const { return BaseMemory; } - -  /// Gets the offset of this slice from the base memory. -  /// -  /// The offset is measured in elements, not bytes. -  size_t getElementOffset() const { return ElementOffset; } - -  /// Gets the number of elements in this slice. -  size_t getElementCount() const { return ElementCount; } - -  /// Returns the number of bytes that can fit in this slice. -  size_t getByteCount() const { return ElementCount * sizeof(ElemT); } - -  /// Creates a slice of the memory with the first DropCount elements removed. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  GlobalDeviceMemorySlice<ElemT> slice(size_t DropCount) const { -    assert(DropCount <= ElementCount && -           "dropping more than the size of a slice"); -    return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset + DropCount, -                                          ElementCount - DropCount); -  } - -  /// Creates a slice of the memory with the last DropCount elements removed. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  GlobalDeviceMemorySlice<ElemT> drop_back(size_t DropCount) const { -    assert(DropCount <= ElementCount && -           "dropping more than the size of a slice"); -    return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset, -                                          ElementCount - DropCount); -  } - -  /// Creates a slice of the memory that chops off the first DropCount elements -  /// and keeps the next TakeCount elements. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  GlobalDeviceMemorySlice<ElemT> slice(size_t DropCount, -                                       size_t TakeCount) const { -    assert(DropCount + TakeCount <= ElementCount && -           "sub-slice operation overruns slice bounds"); -    return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset + DropCount, -                                          TakeCount); -  } - -private: -  const GlobalDeviceMemory<ElemT> &BaseMemory; -  size_t ElementOffset; -  size_t ElementCount; -}; - -/// Wrapper around a generic global device memory allocation. -/// -/// This class represents a buffer of untyped bytes in the global memory space -/// of a device. See GlobalDeviceMemory<T> for the corresponding type that -/// includes type information for the elements in its buffer. -/// -/// This is effectively a pair consisting of an opaque handle and a buffer size -/// in bytes. The opaque handle is a platform-dependent handle to the actual -/// memory that is allocated on the device. -/// -/// In some cases, such as in the CUDA platform, the opaque handle may actually -/// be a pointer in the virtual address space and it may be valid to perform -/// arithmetic on it to obtain other device pointers, but this is not the case -/// in general. -/// -/// For example, in the OpenCL platform, the handle is a pointer to a _cl_mem -/// handle object which really is completely opaque to the user. -class GlobalDeviceMemoryBase { -public: -  /// Returns an opaque handle to the underlying memory. -  const void *getHandle() const { return Handle; } - -  /// Returns the address of the opaque handle as stored by this object. -  const void *const *getHandleAddress() const { return &Handle; } - -  // Cannot copy because the handle must be owned by a single object. -  GlobalDeviceMemoryBase(const GlobalDeviceMemoryBase &) = delete; -  GlobalDeviceMemoryBase &operator=(const GlobalDeviceMemoryBase &) = delete; - -protected: -  /// Creates a GlobalDeviceMemoryBase from a handle and a byte count. -  GlobalDeviceMemoryBase(Device *D, const void *Handle, size_t ByteCount) -      : TheDevice(D), Handle(Handle), ByteCount(ByteCount) {} - -  /// Transfer ownership of the underlying handle. -  GlobalDeviceMemoryBase(GlobalDeviceMemoryBase &&Other) noexcept -      : TheDevice(Other.TheDevice), Handle(Other.Handle), -        ByteCount(Other.ByteCount) { -    Other.TheDevice = nullptr; -    Other.Handle = nullptr; -    Other.ByteCount = 0; -  } - -  GlobalDeviceMemoryBase &operator=(GlobalDeviceMemoryBase &&Other) noexcept { -    TheDevice = Other.TheDevice; -    Handle = Other.Handle; -    ByteCount = Other.ByteCount; -    Other.TheDevice = nullptr; -    Other.Handle = nullptr; -    Other.ByteCount = 0; -    return *this; -  } - -  ~GlobalDeviceMemoryBase(); - -  Device *TheDevice;  // Pointer to the device on which this memory lives. -  const void *Handle; // Platform-dependent value representing allocated memory. -  size_t ByteCount;   // Size in bytes of this allocation. -}; - -/// Typed wrapper around the "void *"-like GlobalDeviceMemoryBase class. -/// -/// For example, GlobalDeviceMemory<int> is a simple wrapper around -/// GlobalDeviceMemoryBase that represents a buffer of integers stored in global -/// device memory. -template <typename ElemT> -class GlobalDeviceMemory : public GlobalDeviceMemoryBase { -public: -  using ElementTy = ElemT; - -  GlobalDeviceMemory(GlobalDeviceMemory &&) noexcept; -  GlobalDeviceMemory &operator=(GlobalDeviceMemory &&) noexcept; - -  /// Returns the number of elements of type ElemT that constitute this -  /// allocation. -  size_t getElementCount() const { return ByteCount / sizeof(ElemT); } - -  /// Returns the number of bytes that can fit in this memory buffer. -  size_t getByteCount() const { return ByteCount; } - -  /// Converts this memory object into a slice. -  GlobalDeviceMemorySlice<ElemT> asSlice() const { -    return GlobalDeviceMemorySlice<ElemT>(*this); -  } - -private: -  GlobalDeviceMemory(const GlobalDeviceMemory &) = delete; -  GlobalDeviceMemory &operator=(const GlobalDeviceMemory &) = delete; - -  // Only a Device can create a GlobalDeviceMemory instance. -  friend Device; -  GlobalDeviceMemory(Device *D, const void *Handle, size_t ElementCount) -      : GlobalDeviceMemoryBase(D, Handle, ElementCount * sizeof(ElemT)) {} -}; - -template <typename ElemT> -GlobalDeviceMemory<ElemT>::GlobalDeviceMemory( -    GlobalDeviceMemory<ElemT> &&) noexcept = default; - -template <typename ElemT> -GlobalDeviceMemory<ElemT> &GlobalDeviceMemory<ElemT>:: -operator=(GlobalDeviceMemory<ElemT> &&) noexcept = default; - -/// A class to represent the size of a dynamic shared memory buffer of elements -/// of type T on a device. -/// -/// Shared memory buffers exist only on the device and cannot be manipulated -/// from the host, so instances of this class do not have an opaque handle, only -/// a size. -/// -/// This type of memory is called "local" memory in OpenCL and "shared" memory -/// in CUDA, and both platforms follow the rule that the host code only knows -/// the size of these buffers and does not have a handle to them. -/// -/// The treatment of shared memory in StreamExecutor matches the way it is done -/// in OpenCL, where a kernel takes any number of shared memory sizes as kernel -/// function arguments. -/// -/// In CUDA only one shared memory size argument is allowed per kernel call. -/// StreamExecutor handles this by allowing CUDA kernel signatures that take -/// multiple SharedDeviceMemory arguments, and simply adding together all the -/// shared memory sizes to get the final shared memory size that is used to -/// launch the kernel. -template <typename ElemT> class SharedDeviceMemory { -public: -  /// Creates a typed area of shared device memory with a given number of -  /// elements. -  static SharedDeviceMemory<ElemT> makeFromElementCount(size_t ElementCount) { -    return SharedDeviceMemory(ElementCount); -  } - -  /// Copyable because it is just an array size. -  SharedDeviceMemory(const SharedDeviceMemory &) = default; - -  /// Copy-assignable because it is just an array size. -  SharedDeviceMemory &operator=(const SharedDeviceMemory &) = default; - -  /// Returns the number of elements of type ElemT that can fit in this memory -  /// buffer. -  size_t getElementCount() const { return ElementCount; } - -  /// Returns the number of bytes that can fit in this memory buffer. -  size_t getByteCount() const { return ElementCount * sizeof(ElemT); } - -  /// Returns whether this is a single-element memory buffer. -  bool isScalar() const { return getElementCount() == 1; } - -private: -  /// Constructs a SharedDeviceMemory instance from an element count. -  /// -  /// This constructor is not public because there is a potential for confusion -  /// between the size of the buffer in bytes and the size of the buffer in -  /// elements. -  /// -  /// The static method makeFromElementCount is provided for users of this class -  /// because its name makes the meaning of the size parameter clear. -  explicit SharedDeviceMemory(size_t ElementCount) -      : ElementCount(ElementCount) {} - -  size_t ElementCount; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_DEVICEMEMORY_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Error.h b/parallel-libs/streamexecutor/include/streamexecutor/Error.h deleted file mode 100644 index d33a5a6a79a..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/Error.h +++ /dev/null @@ -1,215 +0,0 @@ -//===-- Error.h - Error handling --------------------------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Error types used in the public API and internally in StreamExecutor. -/// -/// StreamExecutor's error handling is based on the types streamexecutor::Error -/// and streamexecutor::Expected<T>. -/// -/// -/// \section error The Error Class -/// -/// The Error class either represents success or contains an error message -/// describing the cause of the error. Error instances are created by calling -/// Error::success for successes or make_error for errors. -/// -/// \code{.cpp} -/// Error achieveWorldPeace() { -///   if (WorldPeaceAlreadyAchieved) { -///     return Error::success(); -///   } else { -///     return make_error("Can't someone else do it?"); -///   } -/// } -/// \endcode -/// -/// Error instances are implicitly convertible to bool. Error values convert to -/// true and successes convert to false. Error instances must have their boolean -/// values checked or they must be moved before they go out of scope, otherwise -/// their destruction will cause the program to abort with a warning about an -/// unchecked Error. -/// -/// If the Error represents success, then checking the boolean value is all that -/// is required, but if the Error represents a real error, the Error value must -/// be consumed. The function consumeAndGetMessage is the way to extract the -/// error message from an Error and consume the Error at the same time, so -/// typical error handling will first check whether there was an error and then -/// extract the error message if so. Here is an example: -/// -/// \code{.cpp} -/// if (Error E = achieveWorldPeace()) { -///   printf("An error occurred: %s\n", consumeAndGetMessage(E).c_str()); -///   exit(EXIT_FAILURE): -/// } -/// \endcode -/// -/// It is also common to simply pass an error along up the call stack if it -/// cannot be handled in the current function. -/// -/// \code{.cpp} -/// Error doTask() { -///   if (Error E = achieveWorldPeace()) { -///     return E; -///   } -///   ... -/// } -/// \endcode -/// -/// There is also a function consumeError that consumes an error value without -/// fetching the error message. This is useful when we want to ignore an error. -/// -/// The dieIfError function is also provided for quick-and-dirty error handling. -/// -/// -/// \section expected The Expected Class -/// -/// The Expected<T> class either represents a value of type T or an Error. -/// Expected<T> has one constructor that takes a T value and another constructor -/// that takes an Error rvalue reference, so Expected instances can be -/// constructed either from values or from errors: -/// -/// \code{.cpp} -/// Expected<int> getMyFavoriteInt() { -///   int MyFavorite = 42; -///   if (IsThereAFavorite) { -///     return MyFavorite; -///   } else { -///     return make_error("I don't have a favorite"); -///   } -/// } -/// \endcode -/// -/// Expected<T> instances are implicitly convertible to bool and are true if -/// they contain a value and false if they contain an error. Note that this is -/// the opposite convention of the Error type conversion to bool, where true -/// meant error and false meant success. -/// -/// If the Expected<T> instance is not an error, the stored value can be -/// obtained by using operator*. If access to members of the value are desired -/// instead of the value itself, operator-> can be used as well. -/// -/// Expected<T> instances must have their boolean value checked or they must be -/// moved before they go out of scope, otherwise they will cause the program to -/// abort with a warning about an unchecked error. If the Expected<T> instance -/// contains a value, then checking the boolean value is all that is required, -/// but if it contains an Error object, that Error object must be handled by -/// calling Expected<T>::takeError() to get the underlying error. -/// -/// Here is an example of the use of an Expected<T> value returned from a -/// function: -/// -/// \code{.cpp} -/// Expected<int> ExpectedInt = getMyFavoriteInt(); -/// if (ExpectedInt) { -///   printf("My favorite integer is %d\n", *ExpectedInt); -/// } else { -///   printf("An error occurred: %s\n", -///     consumeAndGetMessage(ExpectedInt.takeError())); -///   exit(EXIT_FAILURE); -/// } -/// \endcode -/// -/// The following snippet shows some examples of how Errors and Expected values -/// can be passed up the stack if they should not be handled in the current -/// function. -/// -/// \code{.cpp} -/// Expected<double> doTask3() { -///   Error WorldPeaceError = achieveWorldPeace(); -///   if (!WorldPeaceError) { -///     return WorldPeaceError; -///   } -/// -///   Expected<martian> ExpectedMartian = getMyFavoriteMartian(); -///   if (!ExpectedMartian) { -///     // Must extract the error because martian cannot be converted to double. -///     return ExpectedMartian.takeError(): -///   } -/// -///   // It's fine to return Expected<int> for Expected<double> because int can -///   // be converted to double. -///   return getMyFavoriteInt(); -/// } -/// \endcode -/// -/// The getOrDie function is also available for quick-and-dirty error handling. -/// -/// -/// \section llvm Relation to llvm::Error and llvm::Expected -/// -/// The streamexecutor::Error and streamexecutor::Expected classes are actually -/// just their LLVM counterparts redeclared in the streamexectuor namespace, but -/// they should be treated as separate types, even so. -/// -/// StreamExecutor does not support any underlying llvm::ErrorInfo class except -/// the one it defines internally for itself, so a streamexecutor::Error can be -/// thought of as a restricted llvm::Error that is guaranteed to hold a specific -/// error type. -/// -/// Although code may compile if llvm functions used to handle these -/// StreamExecutor error types, it is likely that code will lead to runtime -/// errors, so it is strongly recommended that only the functions from the -/// streamexecutor namespace are used on these StreamExecutor error types. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_ERROR_H -#define STREAMEXECUTOR_ERROR_H - -#include <cstdio> -#include <cstdlib> -#include <memory> -#include <string> - -#include "llvm/Support/Error.h" - -namespace streamexecutor { - -using llvm::consumeError; -using llvm::Error; -using llvm::Expected; -using llvm::Twine; - -/// Makes an Error object from an error message. -Error make_error(const Twine &Message); - -/// Consumes the input error and returns its error message. -/// -/// Assumes the input was created by the make_error function above. -std::string consumeAndGetMessage(Error &&E); - -/// Extracts the T value from an Expected<T> or prints an error message to -/// stderr and exits the program with code EXIT_FAILURE if the Expected<T> is an -/// error. -/// -/// This function and the dieIfError function are provided for applications that -/// are OK with aborting the program if an error occurs, and which don't have -/// any special error logging needs. Applications with different error handling -/// needs will likely want to declare their own functions with similar -/// signatures but which log error messages in a different way or attempt to -/// recover from errors instead of aborting the program. -template <typename T> T getOrDie(Expected<T> &&E) { -  if (!E) { -    std::fprintf(stderr, "Error extracting an expected value: %s.\n", -                 consumeAndGetMessage(E.takeError()).c_str()); -    std::exit(EXIT_FAILURE); -  } -  return std::move(*E); -} - -/// Prints an error message to stderr and exits the program with code -/// EXIT_FAILURE if the input is an error. -/// -/// \sa getOrDie -void dieIfError(Error &&E); - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_ERROR_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/HostMemory.h b/parallel-libs/streamexecutor/include/streamexecutor/HostMemory.h deleted file mode 100644 index 18ff184ba68..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/HostMemory.h +++ /dev/null @@ -1,195 +0,0 @@ -//===-- HostMemory.h - Types for registered host memory ---------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// -/// This file defines types that represent registered host memory buffers. Host -/// memory must be registered to participate in asynchronous copies to or from -/// device memory. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_HOSTMEMORY_H -#define STREAMEXECUTOR_HOSTMEMORY_H - -#include <cassert> -#include <cstddef> -#include <type_traits> - -#include "llvm/ADT/ArrayRef.h" - -namespace streamexecutor { - -class Device; -template <typename ElemT> class RegisteredHostMemory; - -/// A mutable slice of registered host memory. -/// -/// The memory is registered in the sense of -/// streamexecutor::Device::registerHostMemory. -/// -/// Holds a reference to an underlying registered host memory buffer. Must not -/// be used after the underlying buffer is freed or unregistered. -template <typename ElemT> class MutableRegisteredHostMemorySlice { -public: -  using ElementTy = ElemT; - -  MutableRegisteredHostMemorySlice(RegisteredHostMemory<ElemT> &Registered) -      : MutableArrayRef(Registered.getPointer(), Registered.getElementCount()) { -  } - -  ElemT *getPointer() const { return MutableArrayRef.data(); } -  size_t getElementCount() const { return MutableArrayRef.size(); } - -  /// Chops off the first DropCount elements of the slice. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  MutableRegisteredHostMemorySlice slice(size_t DropCount) const { -    return MutableRegisteredHostMemorySlice(MutableArrayRef.slice(DropCount)); -  } - -  /// Chops off the first DropCount elements of the slice and keeps the next -  /// TakeCount elements. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  MutableRegisteredHostMemorySlice slice(size_t DropCount, -                                         size_t TakeCount) const { -    return MutableRegisteredHostMemorySlice( -        MutableArrayRef.slice(DropCount, TakeCount)); -  } - -  /// Chops off the last DropCount elements of the slice. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  MutableRegisteredHostMemorySlice drop_back(size_t DropCount) const { -    return MutableRegisteredHostMemorySlice( -        MutableArrayRef.drop_back(DropCount)); -  } - -private: -  MutableRegisteredHostMemorySlice(llvm::MutableArrayRef<ElemT> MutableArrayRef) -      : MutableArrayRef(MutableArrayRef) {} - -  llvm::MutableArrayRef<ElemT> MutableArrayRef; -}; - -/// An immutable slice of registered host memory. -/// -/// The memory is registered in the sense of -/// streamexecutor::Device::registerHostMemory. -/// -/// Holds a reference to an underlying registered host memory buffer. Must not -/// be used after the underlying buffer is freed or unregistered. -template <typename ElemT> class RegisteredHostMemorySlice { -public: -  using ElementTy = ElemT; - -  RegisteredHostMemorySlice(const RegisteredHostMemory<ElemT> &Registered) -      : ArrayRef(Registered.getPointer(), Registered.getElementCount()) {} - -  RegisteredHostMemorySlice( -      MutableRegisteredHostMemorySlice<ElemT> MutableSlice) -      : ArrayRef(MutableSlice.getPointer(), MutableSlice.getElementCount()) {} - -  const ElemT *getPointer() const { return ArrayRef.data(); } -  size_t getElementCount() const { return ArrayRef.size(); } - -  /// Chops off the first N elements of the slice. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  RegisteredHostMemorySlice slice(size_t N) const { -    return RegisteredHostMemorySlice(ArrayRef.slice(N)); -  } - -  /// Chops off the first N elements of the slice and keeps the next M elements. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  RegisteredHostMemorySlice slice(size_t N, size_t M) const { -    return RegisteredHostMemorySlice(ArrayRef.slice(N, M)); -  } - -  /// Chops off the last N elements of the slice. -  LLVM_ATTRIBUTE_UNUSED_RESULT -  RegisteredHostMemorySlice drop_back(size_t N) const { -    return RegisteredHostMemorySlice(ArrayRef.drop_back(N)); -  } - -private: -  llvm::ArrayRef<ElemT> ArrayRef; -}; - -namespace internal { - -/// Helper function to unregister host memory. -/// -/// This is a thin wrapper around streamexecutor::Device::unregisterHostMemory. -/// It is defined so this operation can be performed from the destructor of the -/// template class RegisteredHostMemory without including Device.h in this -/// header and creating a header inclusion cycle. -void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer); - -} // namespace internal - -/// Registered host memory that knows how to unregister itself upon destruction. -/// -/// The memory is registered in the sense of -/// streamexecutor::Device::registerHostMemory. -/// -/// ElemT is the type of element stored in the host buffer. -template <typename ElemT> class RegisteredHostMemory { -public: -  using ElementTy = ElemT; - -  RegisteredHostMemory(Device *TheDevice, ElemT *Pointer, size_t ElementCount) -      : TheDevice(TheDevice), Pointer(Pointer), ElementCount(ElementCount) { -    assert(TheDevice != nullptr && "cannot construct a " -                                   "RegisteredHostMemoryBase with a null " -                                   "platform device"); -  } - -  RegisteredHostMemory(const RegisteredHostMemory &) = delete; -  RegisteredHostMemory &operator=(const RegisteredHostMemory &) = delete; - -  RegisteredHostMemory(RegisteredHostMemory &&Other) noexcept -      : TheDevice(Other.TheDevice), Pointer(Other.Pointer), -        ElementCount(Other.ElementCount) { -    Other.TheDevice = nullptr; -    Other.Pointer = nullptr; -  } - -  RegisteredHostMemory &operator=(RegisteredHostMemory &&Other) noexcept { -    TheDevice = Other.TheDevice; -    Pointer = Other.Pointer; -    ElementCount = Other.ElementCount; -    Other.TheDevice = nullptr; -    Other.Pointer = nullptr; -  } - -  ~RegisteredHostMemory() { -    internal::destroyRegisteredHostMemoryInternals(TheDevice, Pointer); -  } - -  ElemT *getPointer() { return static_cast<ElemT *>(Pointer); } -  const ElemT *getPointer() const { return static_cast<ElemT *>(Pointer); } -  size_t getElementCount() const { return ElementCount; } - -  /// Creates an immutable slice for the entire contents of this memory. -  RegisteredHostMemorySlice<ElemT> asSlice() const { -    return RegisteredHostMemorySlice<ElemT>(*this); -  } - -  /// Creates a mutable slice for the entire contents of this memory. -  MutableRegisteredHostMemorySlice<ElemT> asSlice() { -    return MutableRegisteredHostMemorySlice<ElemT>(*this); -  } - -private: -  Device *TheDevice; -  void *Pointer; -  size_t ElementCount; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_HOSTMEMORY_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h deleted file mode 100644 index eb023816428..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h +++ /dev/null @@ -1,84 +0,0 @@ -//===-- Kernel.h - StreamExecutor kernel types ------------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Types to represent device kernels (code compiled to run on GPU or other -/// accelerator). -/// -/// See the \ref index "main page" for an example of how a compiler-generated -/// specialization of the Kernel class template can be used along with the -/// streamexecutor::Stream::thenLaunch method to create a typesafe interface for -/// kernel launches. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_KERNEL_H -#define STREAMEXECUTOR_KERNEL_H - -#include "streamexecutor/Error.h" -#include "streamexecutor/KernelSpec.h" - -#include <memory> - -namespace streamexecutor { - -class PlatformDevice; - -/// The base class for all kernel types. -/// -/// Stores the name of the kernel in both mangled and demangled forms. -class KernelBase { -public: -  KernelBase(PlatformDevice *D, const void *PlatformKernelHandle, -             llvm::StringRef Name); - -  KernelBase(const KernelBase &Other) = delete; -  KernelBase &operator=(const KernelBase &Other) = delete; - -  KernelBase(KernelBase &&Other) noexcept; -  KernelBase &operator=(KernelBase &&Other) noexcept; - -  ~KernelBase(); - -  const void *getPlatformHandle() const { return PlatformKernelHandle; } -  const std::string &getName() const { return Name; } -  const std::string &getDemangledName() const { return DemangledName; } - -private: -  PlatformDevice *PDevice; -  const void *PlatformKernelHandle; - -  std::string Name; -  std::string DemangledName; -}; - -/// A StreamExecutor kernel. -/// -/// The template parameters are the types of the parameters to the kernel -/// function. -template <typename... ParameterTs> class Kernel : public KernelBase { -public: -  Kernel(PlatformDevice *D, const void *PlatformKernelHandle, -         llvm::StringRef Name) -      : KernelBase(D, PlatformKernelHandle, Name) {} - -  Kernel(Kernel &&Other) noexcept; -  Kernel &operator=(Kernel &&Other) noexcept; -}; - -template <typename... ParameterTs> -Kernel<ParameterTs...>::Kernel(Kernel<ParameterTs...> &&) noexcept = default; - -template <typename... ParameterTs> -Kernel<ParameterTs...> &Kernel<ParameterTs...>:: -operator=(Kernel<ParameterTs...> &&) noexcept = default; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_KERNEL_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h b/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h deleted file mode 100644 index a6a293001ec..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h +++ /dev/null @@ -1,287 +0,0 @@ -//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// KernelLoaderSpec is the base class for types that know where to find the -/// code for a data-parallel kernel in a particular format on a particular -/// platform. So, for example, there will be one subclass that deals with CUDA -/// PTX code, another subclass that deals with CUDA fatbin code, and yet another -/// subclass that deals with OpenCL text code. -/// -/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec -/// instances. This is useful when code is available for the same kernel in -/// several different formats or targeted for several different platforms. All -/// the various KernelLoaderSpec instances for this kernel can be combined -/// together in one MultiKernelLoaderSpec and the specific platform consumer can -/// decide which instance of the code it wants to use. -/// -/// MultiKernelLoaderSpec provides several helper functions to build and -/// register KernelLoaderSpec instances all in a single operation. For example, -/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and -/// register a CUDAPTXInMemorySpec KernelLoaderSpec. -/// -/// The loader spec classes declared here are designed primarily to be -/// instantiated by the compiler, but they can also be instantiated directly by -/// the user. A simplified example workflow which a compiler might follow in the -/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows: -/// -/// 1. The user defines a kernel function called \c UserKernel. -/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds -///    that data into the host code at address \c __UserKernelFatbinAddress. -/// 3. The compiler adds code at the beginning of the host code to instantiate a -///    MultiKernelLoaderSpec: -///    \code -///    namespace compiler_cuda_namespace { -///      MultiKernelLoaderSpec UserKernelLoaderSpec; -///    } // namespace compiler_cuda_namespace -///    \endcode -/// 4. The compiler then adds code to the host code to add the fatbin data to -///    the new MultiKernelLoaderSpec, and to associate that data with the kernel -///    name \c "UserKernel": -///    \code -///    namespace compiler_cuda_namespace { -///      UserKernelLoaderSpec.addCUDAFatbinInMemory( -///        __UserKernelFatbinAddress, "UserKernel"); -///    } // namespace compiler_cuda_namespace -///    \endcode -/// 5. The host code, having known beforehand that the compiler would initialize -///    a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use -///    of the symbol \c cudanamespace::UserKernelLoaderSpec without defining it. -/// -/// In the example above, the MultiKernelLoaderSpec instance created by the -/// compiler can be used by the host code to create StreamExecutor kernel -/// objects. In turn, those StreamExecutor kernel objects can be used by the -/// host code to launch the kernel on the device as desired. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_KERNELSPEC_H -#define STREAMEXECUTOR_KERNELSPEC_H - -#include <cassert> -#include <functional> -#include <map> -#include <memory> -#include <string> - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" - -namespace streamexecutor { - -/// An object that knows how to find the code for a device kernel. -/// -/// This is the base class for the hierarchy of loader specs. The different -/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL -/// binary). -/// -/// This base class has functionality for storing and getting the name of the -/// kernel as a string. -class KernelLoaderSpec { -public: -  /// Returns the name of the kernel this spec loads. -  const std::string &getKernelName() const { return KernelName; } - -protected: -  explicit KernelLoaderSpec(llvm::StringRef KernelName); - -private: -  std::string KernelName; - -  KernelLoaderSpec(const KernelLoaderSpec &) = delete; -  KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete; -}; - -/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a -/// null-terminated string. -class CUDAPTXInMemorySpec : public KernelLoaderSpec { -public: -  /// First component is major version, second component is minor version. -  using ComputeCapability = std::pair<int, int>; - -  /// PTX code combined with its compute capability. -  struct PTXSpec { -    ComputeCapability TheComputeCapability; -    const char *PTXCode; -  }; - -  /// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects. -  /// -  /// Adds each item in SpecList to this object. -  /// -  /// Does not take ownership of the PTXCode pointers in the SpecList elements. -  CUDAPTXInMemorySpec( -      llvm::StringRef KernelName, -      const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList); - -  /// Returns a pointer to the PTX code for the greatest compute capability not -  /// exceeding the requested compute capability. -  /// -  /// Returns nullptr on failed lookup (if the requested version is not -  /// available and no lower versions are available). -  const char *getCode(int ComputeCapabilityMajor, -                      int ComputeCapabilityMinor) const; - -private: -  /// PTX code contents in memory. -  /// -  /// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5). -  std::map<ComputeCapability, const char *> PTXByComputeCapability; - -  CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete; -  CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete; -}; - -/// A KernelLoaderSpec for CUDA fatbin code that resides in memory. -class CUDAFatbinInMemorySpec : public KernelLoaderSpec { -public: -  /// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin -  /// bytes. -  /// -  /// Does not take ownership of the Bytes pointer. -  CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes); - -  /// Gets the fatbin data bytes. -  const void *getBytes() const { return Bytes; } - -private: -  const void *Bytes; - -  CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete; -  CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete; -}; - -/// A KernelLoaderSpec for OpenCL text that resides in memory as a -/// null-terminated string. -class OpenCLTextInMemorySpec : public KernelLoaderSpec { -public: -  /// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text -  /// code bytes. -  /// -  /// Does not take ownership of the Text pointer. -  OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text); - -  /// Returns the OpenCL text contents. -  const char *getText() const { return Text; } - -private: -  const char *Text; - -  OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete; -  OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete; -}; - -/// An object to store several different KernelLoaderSpecs for the same kernel. -/// -/// This allows code in different formats and for different platforms to be -/// stored all together for a single kernel. -/// -/// Various methods are available to add a new KernelLoaderSpec to a -/// MultiKernelLoaderSpec. There are also methods to query which formats and -/// platforms are supported by the currently added KernelLoaderSpec objects, and -/// methods to get the KernelLoaderSpec objects for each format and platform. -/// -/// Since all stored KernelLoaderSpecs are supposed to reference the same -/// kernel, they are all assumed to take the same number and type of parameters, -/// but no checking is done to enforce this. In debug mode, all -/// KernelLoaderSpecs are checked to make sure they have the same kernel name, -/// so passing in specs with different kernel names can cause the program to -/// abort. -/// -/// This interface is prone to errors, so it is better to leave -/// MultiKernelLoaderSpec creation and initialization to the compiler rather -/// than doing it by hand. -class MultiKernelLoaderSpec { -public: -  /// Type of functions used as host platform kernels. -  using HostFunctionTy = std::function<void(const void **)>; - -  std::string getKernelName() const { -    if (TheKernelName) -      return *TheKernelName; -    return ""; -  } - -  // Convenience getters for testing whether these platform variants have -  // kernel loader specifications available. - -  bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; } -  bool hasCUDAFatbinInMemory() const { -    return TheCUDAFatbinInMemorySpec != nullptr; -  } -  bool hasOpenCLTextInMemory() const { -    return TheOpenCLTextInMemorySpec != nullptr; -  } -  bool hasHostFunction() const { return HostFunction != nullptr; } - -  // Accessors for platform variant kernel load specifications. -  // -  // Precondition: corresponding has* method returns true. - -  const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const { -    assert(hasCUDAPTXInMemory() && "getting spec that is not present"); -    return *TheCUDAPTXInMemorySpec; -  } -  const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const { -    assert(hasCUDAFatbinInMemory() && "getting spec that is not present"); -    return *TheCUDAFatbinInMemorySpec; -  } -  const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const { -    assert(hasOpenCLTextInMemory() && "getting spec that is not present"); -    return *TheOpenCLTextInMemorySpec; -  } - -  const HostFunctionTy &getHostFunction() const { -    assert(hasHostFunction() && "getting spec that is not present"); -    return *HostFunction; -  } - -  // Builder-pattern-like methods for use in initializing a -  // MultiKernelLoaderSpec. -  // -  // Each of these should be used at most once for a single -  // MultiKernelLoaderSpec object. See file comment for example usage. -  // -  // Note that the KernelName parameter must be consistent with the kernel in -  // the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel -  // name may be mangled by the compiler if it is not declared extern "C". - -  /// Does not take ownership of the PTXCode pointers in the SpecList elements. -  MultiKernelLoaderSpec & -  addCUDAPTXInMemory(llvm::StringRef KernelName, -                     llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList); - -  /// Does not take ownership of the FatbinBytes pointer. -  MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName, -                                               const void *FatbinBytes); - -  /// Does not take ownership of the OpenCLText pointer. -  MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName, -                                               const char *OpenCLText); - -  MultiKernelLoaderSpec &addHostFunction(llvm::StringRef KernelName, -                                         HostFunctionTy Function) { -    HostFunction = llvm::make_unique<HostFunctionTy>(std::move(Function)); -    return *this; -  } - -private: -  void setKernelName(llvm::StringRef KernelName); - -  std::unique_ptr<std::string> TheKernelName; -  std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec; -  std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec; -  std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec; -  std::unique_ptr<HostFunctionTy> HostFunction; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_KERNELSPEC_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h b/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h deleted file mode 100644 index a88cbfc8cf9..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h +++ /dev/null @@ -1,47 +0,0 @@ -//===-- LaunchDimensions.h - Kernel block and grid sizes --------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Structures to hold sizes for blocks and grids which are used as parameters -/// for kernel launches. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_LAUNCHDIMENSIONS_H -#define STREAMEXECUTOR_LAUNCHDIMENSIONS_H - -namespace streamexecutor { - -/// The dimensions of a device block of execution. -/// -/// A block is made up of an array of X by Y by Z threads. -struct BlockDimensions { -  BlockDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1) -      : X(X), Y(Y), Z(Z) {} - -  unsigned X; -  unsigned Y; -  unsigned Z; -}; - -/// The dimensions of a device grid of execution. -/// -/// A grid is made up of an array of X by Y by Z blocks. -struct GridDimensions { -  GridDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1) -      : X(X), Y(Y), Z(Z) {} - -  unsigned X; -  unsigned Y; -  unsigned Z; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_LAUNCHDIMENSIONS_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h b/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h deleted file mode 100644 index f34ec67089f..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h +++ /dev/null @@ -1,234 +0,0 @@ -//===-- PackedKernelArgumentArray.h - Packed kernel arg types ---*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// The types in this file are designed to deal with the fact that device memory -/// kernel arguments are treated differently from other arguments during kernel -/// argument packing. -/// -/// GlobalDeviceMemory<T> arguments are passed to a kernel by passing their -/// opaque handle. SharedDeviceMemory<T> arguments have no associated address, -/// only a size, so the size is the only information that gets passed to the -/// kernel launch. -/// -/// The KernelArgumentType enum is used to keep track of the type of each -/// argument. -/// -/// The PackedKernelArgumentArray class uses template metaprogramming to convert -/// each argument to a PackedKernelArgument with minimal runtime overhead. -/// -/// The design of the PackedKernelArgumentArray class has a few idiosyncrasies -/// due to the fact that parameter packing has been identified as -/// performance-critical in some applications. The packed argument data is -/// stored as a struct of arrays rather than an array of structs because CUDA -/// kernel launches in the CUDA driver API take an array of argument addresses. -/// Having created the array of argument addresses here, no further work will -/// need to be done in the CUDA driver layer to unpack and repack the addresses. -/// -/// The shared memory argument count is maintained separately because in the -/// common case where it is zero, the CUDA layer doesn't have to loop through -/// the argument array and sum up all the shared memory sizes. This is another -/// performance optimization that shows up as a quirk in this class interface. -/// -/// The platform-interface kernel launch function will take the following -/// arguments, which are provided by this interface: -///   * argument count, -///   * array of argument address, -///   * array of argument sizes, -///   * array of argument types, and -///   * shared pointer count. -/// This information should be enough to allow any platform to launch the kernel -/// efficiently, although it is probably more information than is needed for any -/// specific platform. -/// -/// The PackedKernelArgumentArrayBase class has no template parameters, so it -/// does not benefit from compile-time type checking. However, since it has no -/// template parameters, it can be passed as an argument to virtual functions, -/// and this allows it to be passed to functions that use virtual function -/// overloading to handle platform-specific kernel launching. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H -#define STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H - -#include <array> - -#include "streamexecutor/DeviceMemory.h" - -namespace streamexecutor { - -enum class KernelArgumentType { -  VALUE,                /// Non-device-memory argument. -  GLOBAL_DEVICE_MEMORY, /// Non-shared device memory argument. -  SHARED_DEVICE_MEMORY  /// Shared device memory argument. -}; - -/// An array of packed kernel arguments without compile-time type information. -/// -/// This un-templated base class is useful because packed kernel arguments must -/// at some point be passed to a virtual function that performs -/// platform-specific kernel launches. Such a virtual function cannot be -/// templated to handle all specializations of the -/// PackedKernelArgumentArray<...> class template, so, instead, references to -/// PackedKernelArgumentArray<...> are passed as references to this base class. -class PackedKernelArgumentArrayBase { -public: -  virtual ~PackedKernelArgumentArrayBase(); - -  /// Gets the number of packed arguments. -  size_t getArgumentCount() const { return ArgumentCount; } - -  /// Gets the address of the argument at the given index. -  const void *getAddress(size_t Index) const { return AddressesData[Index]; } - -  /// Gets the size of the argument at the given index. -  size_t getSize(size_t Index) const { return SizesData[Index]; } - -  /// Gets the type of the argument at the given index. -  KernelArgumentType getType(size_t Index) const { return TypesData[Index]; } - -  /// Gets a pointer to the address array. -  const void *const *getAddresses() const { return AddressesData; } - -  /// Gets a pointer to the sizes array. -  const size_t *getSizes() const { return SizesData; } - -  /// Gets a pointer to the types array. -  const KernelArgumentType *getTypes() const { return TypesData; } - -  /// Gets the number of shared device memory arguments. -  size_t getSharedCount() const { return SharedCount; } - -protected: -  PackedKernelArgumentArrayBase(size_t ArgumentCount) -      : ArgumentCount(ArgumentCount), SharedCount(0u) {} - -  size_t ArgumentCount; -  size_t SharedCount; -  const void *const *AddressesData; -  size_t *SizesData; -  KernelArgumentType *TypesData; -}; - -/// An array of packed kernel arguments with compile-time type information. -/// -/// This is used by the platform-independent StreamExecutor code to pack -/// arguments in a compile-time type-safe way. In order to actually launch a -/// kernel on a specific platform, however, a reference to this class will have -/// to be passed to a virtual, platform-specific kernel launch function. Such a -/// reference will be passed as a reference to the base class rather than a -/// reference to this subclass itself because a virtual function cannot be -/// templated in such a way to maintain the template parameter types of the -/// subclass. -template <typename... ParameterTs> -class PackedKernelArgumentArray : public PackedKernelArgumentArrayBase { -public: -  /// Constructs an instance by packing the specified arguments. -  /// -  /// Rather than using this constructor directly, consider using the -  /// make_kernel_argument_pack function instead, to get the compiler to infer -  /// the parameter types for you. -  PackedKernelArgumentArray(const ParameterTs &... Arguments) -      : PackedKernelArgumentArrayBase(sizeof...(ParameterTs)) { -    AddressesData = Addresses.data(); -    SizesData = Sizes.data(); -    TypesData = Types.data(); -    PackArguments(0, Arguments...); -  } - -  ~PackedKernelArgumentArray() override = default; - -private: -  // Base case for PackArguments when there are no arguments to pack. -  void PackArguments(size_t) {} - -  // Induction step for PackArguments. -  template <typename T, typename... RemainingParameterTs> -  void PackArguments(size_t Index, const T &Argument, -                     const RemainingParameterTs &... RemainingArguments) { -    PackOneArgument(Index, Argument); -    PackArguments(Index + 1, RemainingArguments...); -  } - -  // Pack a normal, non-device-memory argument. -  template <typename T> void PackOneArgument(size_t Index, const T &Argument) { -    Addresses[Index] = &Argument; -    Sizes[Index] = sizeof(T); -    Types[Index] = KernelArgumentType::VALUE; -  } - -  // Pack a GlobalDeviceMemory<T> argument. -  template <typename T> -  void PackOneArgument(size_t Index, const GlobalDeviceMemory<T> &Argument) { -    Addresses[Index] = Argument.getHandleAddress(); -    Sizes[Index] = sizeof(void *); -    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY; -  } - -  // Pack a GlobalDeviceMemory<T> pointer argument. -  template <typename T> -  void PackOneArgument(size_t Index, GlobalDeviceMemory<T> *Argument) { -    Addresses[Index] = Argument->getHandleAddress(); -    Sizes[Index] = sizeof(void *); -    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY; -  } - -  // Pack a const GlobalDeviceMemory<T> pointer argument. -  template <typename T> -  void PackOneArgument(size_t Index, const GlobalDeviceMemory<T> *Argument) { -    Addresses[Index] = Argument->getHandleAddress(); -    Sizes[Index] = sizeof(void *); -    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY; -  } - -  // Pack a SharedDeviceMemory argument. -  template <typename T> -  void PackOneArgument(size_t Index, const SharedDeviceMemory<T> &Argument) { -    ++SharedCount; -    Addresses[Index] = nullptr; -    Sizes[Index] = Argument.getElementCount() * sizeof(T); -    Types[Index] = KernelArgumentType::SHARED_DEVICE_MEMORY; -  } - -  // Pack a SharedDeviceMemory pointer argument. -  template <typename T> -  void PackOneArgument(size_t Index, SharedDeviceMemory<T> *Argument) { -    ++SharedCount; -    Addresses[Index] = nullptr; -    Sizes[Index] = Argument->getElementCount() * sizeof(T); -    Types[Index] = KernelArgumentType::SHARED_DEVICE_MEMORY; -  } - -  // Pack a const SharedDeviceMemory pointer argument. -  template <typename T> -  void PackOneArgument(size_t Index, const SharedDeviceMemory<T> *Argument) { -    ++SharedCount; -    Addresses[Index] = nullptr; -    Sizes[Index] = Argument->getElementCount() * sizeof(T); -    Types[Index] = KernelArgumentType::SHARED_DEVICE_MEMORY; -  } - -  std::array<const void *, sizeof...(ParameterTs)> Addresses; -  std::array<size_t, sizeof...(ParameterTs)> Sizes; -  std::array<KernelArgumentType, sizeof...(ParameterTs)> Types; -}; - -// Utility template function to call the PackedKernelArgumentArray constructor -// with the template arguments matching the types of the arguments passed to -// this function. -template <typename... ParameterTs> -PackedKernelArgumentArray<ParameterTs...> -make_kernel_argument_pack(const ParameterTs &... Arguments) { -  return PackedKernelArgumentArray<ParameterTs...>(Arguments...); -} - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Platform.h b/parallel-libs/streamexecutor/include/streamexecutor/Platform.h deleted file mode 100644 index 8ced35d2066..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/Platform.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- Platform.h - The Platform class -------------------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// The Platform class which represents a platform such as CUDA or OpenCL. -/// -/// This is an abstract base class that will be overridden by each specific -/// platform. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORM_H -#define STREAMEXECUTOR_PLATFORM_H - -#include "streamexecutor/Error.h" - -namespace streamexecutor { - -class Device; - -class Platform { -public: -  virtual ~Platform(); - -  /// Gets the number of devices available for this platform. -  virtual size_t getDeviceCount() const = 0; - -  /// Gets a Device with the given index for this platform. -  virtual Expected<Device> getDevice(size_t DeviceIndex) = 0; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORM_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h deleted file mode 100644 index 5b10e7067b1..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h +++ /dev/null @@ -1,171 +0,0 @@ -//===-- PlatformDevice.h - PlatformDevice class -----------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Declaration of the PlatformDevice class. -/// -/// Each specific platform such as CUDA or OpenCL must subclass PlatformDevice -/// and override streamexecutor::Platform::getDevice to return an instance of -/// their PlatformDevice subclass. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORMDEVICE_H -#define STREAMEXECUTOR_PLATFORMDEVICE_H - -#include "streamexecutor/DeviceMemory.h" -#include "streamexecutor/Error.h" -#include "streamexecutor/Kernel.h" -#include "streamexecutor/LaunchDimensions.h" -#include "streamexecutor/PackedKernelArgumentArray.h" - -namespace streamexecutor { - -/// Raw executor methods that must be implemented by each platform. -/// -/// The public Device and Stream classes have the type-safe versions of the -/// functions in this interface. -class PlatformDevice { -public: -  virtual ~PlatformDevice(); - -  virtual std::string getName() const = 0; - -  virtual std::string getPlatformName() const = 0; - -  /// Creates a platform-specific kernel. -  virtual Expected<const void *> -  createKernel(const MultiKernelLoaderSpec &Spec) { -    return make_error("createKernel not implemented for platform " + -                      getPlatformName()); -  } - -  virtual Error destroyKernel(const void *Handle) { -    return make_error("destroyKernel not implemented for platform " + -                      getPlatformName()); -  } - -  /// Creates a platform-specific stream. -  virtual Expected<const void *> createStream() { -    return make_error("createStream not implemented for platform " + -                      getPlatformName()); -  } - -  virtual Error destroyStream(const void *Handle) { -    return make_error("destroyStream not implemented for platform " + -                      getPlatformName()); -  } - -  /// Launches a kernel on the given stream. -  virtual Error launch(const void *PlatformStreamHandle, -                       BlockDimensions BlockSize, GridDimensions GridSize, -                       const void *PKernelHandle, -                       const PackedKernelArgumentArrayBase &ArgumentArray) { -    return make_error("launch not implemented for platform " + -                      getPlatformName()); -  } - -  /// Copies data from the device to the host. -  /// -  /// HostDst should have been registered with registerHostMemory. -  virtual Error copyD2H(const void *PlatformStreamHandle, -                        const void *DeviceSrcHandle, size_t SrcByteOffset, -                        void *HostDst, size_t DstByteOffset, size_t ByteCount) { -    return make_error("copyD2H not implemented for platform " + -                      getPlatformName()); -  } - -  /// Copies data from the host to the device. -  /// -  /// HostSrc should have been registered with registerHostMemory. -  virtual Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc, -                        size_t SrcByteOffset, const void *DeviceDstHandle, -                        size_t DstByteOffset, size_t ByteCount) { -    return make_error("copyH2D not implemented for platform " + -                      getPlatformName()); -  } - -  /// Copies data from one device location to another. -  virtual Error copyD2D(const void *PlatformStreamHandle, -                        const void *DeviceSrcHandle, size_t SrcByteOffset, -                        const void *DeviceDstHandle, size_t DstByteOffset, -                        size_t ByteCount) { -    return make_error("copyD2D not implemented for platform " + -                      getPlatformName()); -  } - -  /// Blocks the host until the given stream completes all the work enqueued up -  /// to the point this function is called. -  virtual Error blockHostUntilDone(const void *PlatformStreamHandle) { -    return make_error("blockHostUntilDone not implemented for platform " + -                      getPlatformName()); -  } - -  /// Allocates untyped device memory of a given size in bytes. -  virtual Expected<void *> allocateDeviceMemory(size_t ByteCount) { -    return make_error("allocateDeviceMemory not implemented for platform " + -                      getPlatformName()); -  } - -  /// Frees device memory previously allocated by allocateDeviceMemory. -  virtual Error freeDeviceMemory(const void *Handle) { -    return make_error("freeDeviceMemory not implemented for platform " + -                      getPlatformName()); -  } - -  /// Registers previously allocated host memory so it can be used with copyH2D -  /// and copyD2H. -  virtual Error registerHostMemory(void *Memory, size_t ByteCount) { -    return make_error("registerHostMemory not implemented for platform " + -                      getPlatformName()); -  } - -  /// Unregisters host memory previously registered with registerHostMemory. -  virtual Error unregisterHostMemory(const void *Memory) { -    return make_error("unregisterHostMemory not implemented for platform " + -                      getPlatformName()); -  } - -  /// Copies the given number of bytes from device memory to host memory. -  /// -  /// Blocks the calling host thread until the copy is completed. Can operate on -  /// any host memory, not just registered host memory. Does not block any -  /// ongoing device calls. -  virtual Error synchronousCopyD2H(const void *DeviceSrcHandle, -                                   size_t SrcByteOffset, void *HostDst, -                                   size_t DstByteOffset, size_t ByteCount) { -    return make_error("synchronousCopyD2H not implemented for platform " + -                      getPlatformName()); -  } - -  /// Similar to synchronousCopyD2H(const void *, size_t, void -  /// *, size_t, size_t), but copies memory from host to device rather than -  /// device to host. -  virtual Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, -                                   const void *DeviceDstHandle, -                                   size_t DstByteOffset, size_t ByteCount) { -    return make_error("synchronousCopyH2D not implemented for platform " + -                      getPlatformName()); -  } - -  /// Similar to synchronousCopyD2H(const void *, size_t, void -  /// *, size_t, size_t), but copies memory from one location in device memory -  /// to another rather than from device to host. -  virtual Error synchronousCopyD2D(const void *DeviceSrcHandle, -                                   size_t SrcByteOffset, -                                   const void *DeviceDstHandle, -                                   size_t DstByteOffset, size_t ByteCount) { -    return make_error("synchronousCopyD2D not implemented for platform " + -                      getPlatformName()); -  } -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORMDEVICE_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformManager.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformManager.h deleted file mode 100644 index 7d0de12528d..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformManager.h +++ /dev/null @@ -1,53 +0,0 @@ -//===-- PlatformManager.h - The PlatformManager class -----------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// PlatformManager is the entry point into the StreamExecutor API. A user -/// begins be calling PlatformManager::getPlatformByName("cuda") where "cuda" -/// can be replaced by any supported platform name. This gives the user a -/// Platform object that can be used to create Device objects for that platform, -/// etc. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORMMANAGER_H -#define STREAMEXECUTOR_PLATFORMMANAGER_H - -#include <map> - -#include "streamexecutor/Error.h" -#include "streamexecutor/Platform.h" - -namespace streamexecutor { - -/// A singleton that holds a reference to a Platform object for each -/// supported StreamExecutor platform. -class PlatformManager { -public: -  /// Gets a reference to the Platform with the given name. -  /// -  /// The name parameter is not case-sensitive, so the following arguments are -  /// all equivalent: "cuda", "CUDA", "Cuda", "cUdA". -  /// -  /// Returns an error if no platform is present for the name. -  /// -  /// Ownership of the platform is NOT transferred to the caller. -  static Expected<Platform *> getPlatformByName(llvm::StringRef Name); - -private: -  PlatformManager(); -  PlatformManager(const PlatformManager &) = delete; -  PlatformManager operator=(const PlatformManager &) = delete; - -  std::map<std::string, std::unique_ptr<Platform>> PlatformsByName; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORMMANAGER_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformOptions.h.in b/parallel-libs/streamexecutor/include/streamexecutor/PlatformOptions.h.in deleted file mode 100644 index 2934dd428ea..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformOptions.h.in +++ /dev/null @@ -1,23 +0,0 @@ -//===-- PlatformOptions.h - Platform option macros --------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This contents of this file are filled in at configuration time. This file -/// defines macros that represent the platform configuration state of the build, -/// e.g. which platforms are enabled. -/// -//===----------------------------------------------------------------------===// - - -#ifndef STREAMEXECUTOR_PLATFORMOPTIONS_H -#define STREAMEXECUTOR_PLATFORMOPTIONS_H - -#cmakedefine STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM - -#endif // STREAMEXECUTOR_PLATFORMOPTIONS_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h b/parallel-libs/streamexecutor/include/streamexecutor/Stream.h deleted file mode 100644 index bdff7ff9701..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h +++ /dev/null @@ -1,313 +0,0 @@ -//===-- Stream.h - A stream of execution ------------------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// -/// A Stream instance represents a queue of sequential, host-asynchronous work -/// to be performed on a device. -/// -/// To enqueue work on a device, first create a Device instance then use that -/// Device to create a Stream instance. The Stream instance will perform its -/// work on the device managed by the Device object that created it. -/// -/// The various "then" methods of the Stream object, such as thenCopyH2D and -/// thenLaunch, may be used to enqueue work on the Stream, and the -/// blockHostUntilDone() method may be used to block the host code until the -/// Stream has completed all its work. -/// -/// Multiple Stream instances can be created for the same Device. This allows -/// several independent streams of computation to be performed simultaneously on -/// a single device. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_STREAM_H -#define STREAMEXECUTOR_STREAM_H - -#include <cassert> -#include <memory> -#include <string> -#include <type_traits> - -#include "streamexecutor/DeviceMemory.h" -#include "streamexecutor/Error.h" -#include "streamexecutor/HostMemory.h" -#include "streamexecutor/Kernel.h" -#include "streamexecutor/LaunchDimensions.h" -#include "streamexecutor/PackedKernelArgumentArray.h" -#include "streamexecutor/PlatformDevice.h" - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/RWMutex.h" - -namespace streamexecutor { - -/// Represents a stream of dependent computations on a device. -/// -/// The operations within a stream execute sequentially and asynchronously until -/// blockHostUntilDone() is invoked, which synchronously joins host code with -/// the execution of the stream. -/// -/// If any given operation fails when entraining work for the stream, isOK() -/// will indicate that an error has occurred and getStatus() will get the first -/// error that occurred on the stream. There is no way to clear the error state -/// of a stream once it is in an error state. -class Stream { -public: -  Stream(PlatformDevice *D, const void *PlatformStreamHandle); - -  Stream(const Stream &Other) = delete; -  Stream &operator=(const Stream &Other) = delete; - -  Stream(Stream &&Other) noexcept; -  Stream &operator=(Stream &&Other) noexcept; - -  ~Stream(); - -  /// Returns whether any error has occurred while entraining work on this -  /// stream. -  bool isOK() const { -    llvm::sys::ScopedReader ReaderLock(*ErrorMessageMutex); -    return !ErrorMessage; -  } - -  /// Returns the status created by the first error that occurred while -  /// entraining work on this stream. -  Error getStatus() const { -    llvm::sys::ScopedReader ReaderLock(*ErrorMessageMutex); -    if (ErrorMessage) -      return make_error(*ErrorMessage); -    else -      return Error::success(); -  } - -  // Blocks the calling host thread until all work enqueued on this Stream -  // completes. -  // -  // Returns the result of getStatus() after the Stream work completes. -  Error blockHostUntilDone() { -    setError(PDevice->blockHostUntilDone(PlatformStreamHandle)); -    return getStatus(); -  } - -  /// Entrains onto the stream of operations a kernel launch with the given -  /// arguments. -  /// -  /// These arguments can be device memory types like GlobalDeviceMemory<T> and -  /// SharedDeviceMemory<T>, or they can be primitive types such as int. The -  /// allowable argument types are determined by the template parameters to the -  /// Kernel argument. -  template <typename... ParameterTs> -  Stream &thenLaunch(BlockDimensions BlockSize, GridDimensions GridSize, -                     const Kernel<ParameterTs...> &K, -                     const ParameterTs &... Arguments) { -    auto ArgumentArray = -        make_kernel_argument_pack<ParameterTs...>(Arguments...); -    setError(PDevice->launch(PlatformStreamHandle, BlockSize, GridSize, -                             K.getPlatformHandle(), ArgumentArray)); -    return *this; -  } - -  /// \name Device memory copying functions -  /// -  /// These methods enqueue a device memory copy operation on the stream and -  /// return without waiting for the operation to complete. -  /// -  /// The arguments and bounds checking for these methods match the API of the -  /// \ref DeviceHostSyncCopyGroup -  /// "host-synchronous device memory copying functions" of Device. -  /// -  /// The template types SrcTy and DstTy must match the following constraints: -  ///   * Must define typename ElementTy (the type of element stored in the -  ///   memory); -  ///   * ElementTy for the source argument must be the same as ElementTy for -  ///     the destination argument; -  ///   * Must be convertible to the correct slice type: -  ///     * GlobalDeviceMemorySlice<ElementTy> for device memory arguments, -  ///     * RegisteredHostMemorySlice<ElementTy> for host memory source -  ///       arguments, -  ///     * MutableRegisteredHostMemorySlice<ElementT> for host memory -  ///       destination arguments. -  ///@{ - -  // D2H - -  template <typename SrcTy, typename DstTy> -  Stream &thenCopyD2H(SrcTy &&Src, DstTy &&Dst, size_t ElementCount) { -    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy; -    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy; -    static_assert(std::is_same<SrcElemTy, DstElemTy>::value, -                  "src/dst element type mismatch for thenCopyD2H"); -    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src); -    MutableRegisteredHostMemorySlice<DstElemTy> DstSlice(Dst); -    if (ElementCount > Src.getElementCount()) -      setError("copying too many elements, " + llvm::Twine(ElementCount) + -               ", from a device array of element count " + -               llvm::Twine(SrcSlice.getElementCount())); -    else if (ElementCount > DstSlice.getElementCount()) -      setError("copying too many elements, " + llvm::Twine(ElementCount) + -               ", to a host array of element count " + -               llvm::Twine(DstSlice.getElementCount())); -    else -      setError(PDevice->copyD2H( -          PlatformStreamHandle, SrcSlice.getBaseMemory().getHandle(), -          SrcSlice.getElementOffset() * sizeof(SrcElemTy), -          DstSlice.getPointer(), 0, ElementCount * sizeof(DstElemTy))); -    return *this; -  } - -  template <typename SrcTy, typename DstTy> -  Stream &thenCopyD2H(SrcTy &&Src, DstTy &&Dst) { -    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy; -    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy; -    static_assert(std::is_same<SrcElemTy, DstElemTy>::value, -                  "src/dst element type mismatch for thenCopyD2H"); -    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src); -    MutableRegisteredHostMemorySlice<DstElemTy> DstSlice(Dst); -    if (SrcSlice.getElementCount() != DstSlice.getElementCount()) -      setError("array size mismatch for D2H, device source has element count " + -               llvm::Twine(SrcSlice.getElementCount()) + -               " but host destination has element count " + -               llvm::Twine(DstSlice.getElementCount())); -    else -      thenCopyD2H(SrcSlice, DstSlice, SrcSlice.getElementCount()); -    return *this; -  } - -  // H2D - -  template <typename SrcTy, typename DstTy> -  Stream &thenCopyH2D(SrcTy &&Src, DstTy &&Dst, size_t ElementCount) { -    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy; -    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy; -    static_assert(std::is_same<SrcElemTy, DstElemTy>::value, -                  "src/dst element type mismatch for thenCopyH2D"); -    RegisteredHostMemorySlice<SrcElemTy> SrcSlice(Src); -    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst); -    if (ElementCount > SrcSlice.getElementCount()) -      setError("copying too many elements, " + llvm::Twine(ElementCount) + -               ", from a host array of element count " + -               llvm::Twine(SrcSlice.getElementCount())); -    else if (ElementCount > DstSlice.getElementCount()) -      setError("copying too many elements, " + llvm::Twine(ElementCount) + -               ", to a device array of element count " + -               llvm::Twine(DstSlice.getElementCount())); -    else -      setError(PDevice->copyH2D(PlatformStreamHandle, SrcSlice.getPointer(), 0, -                                DstSlice.getBaseMemory().getHandle(), -                                DstSlice.getElementOffset() * sizeof(DstElemTy), -                                ElementCount * sizeof(SrcElemTy))); -    return *this; -  } - -  template <typename SrcTy, typename DstTy> -  Stream &thenCopyH2D(SrcTy &&Src, DstTy &&Dst) { -    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy; -    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy; -    static_assert(std::is_same<SrcElemTy, DstElemTy>::value, -                  "src/dst element type mismatch for thenCopyH2D"); -    RegisteredHostMemorySlice<SrcElemTy> SrcSlice(Src); -    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst); -    if (SrcSlice.getElementCount() != DstSlice.getElementCount()) -      setError("array size mismatch for H2D, host source has element count " + -               llvm::Twine(SrcSlice.getElementCount()) + -               " but device destination has element count " + -               llvm::Twine(DstSlice.getElementCount())); -    else -      thenCopyH2D(SrcSlice, DstSlice, DstSlice.getElementCount()); -    return *this; -  } - -  // D2D - -  template <typename SrcTy, typename DstTy> -  Stream &thenCopyD2D(SrcTy &&Src, DstTy &&Dst, size_t ElementCount) { -    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy; -    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy; -    static_assert(std::is_same<SrcElemTy, DstElemTy>::value, -                  "src/dst element type mismatch for thenCopyD2D"); -    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src); -    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst); -    if (ElementCount > SrcSlice.getElementCount()) -      setError("copying too many elements, " + llvm::Twine(ElementCount) + -               ", from a device array of element count " + -               llvm::Twine(SrcSlice.getElementCount())); -    else if (ElementCount > DstSlice.getElementCount()) -      setError("copying too many elements, " + llvm::Twine(ElementCount) + -               ", to a device array of element count " + -               llvm::Twine(DstSlice.getElementCount())); -    else -      setError(PDevice->copyD2D(PlatformStreamHandle, -                                SrcSlice.getBaseMemory().getHandle(), -                                SrcSlice.getElementOffset() * sizeof(SrcElemTy), -                                DstSlice.getBaseMemory().getHandle(), -                                DstSlice.getElementOffset() * sizeof(DstElemTy), -                                ElementCount * sizeof(SrcElemTy))); -    return *this; -  } - -  template <typename SrcTy, typename DstTy> -  Stream &thenCopyD2D(SrcTy &&Src, DstTy &&Dst) { -    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy; -    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy; -    static_assert(std::is_same<SrcElemTy, DstElemTy>::value, -                  "src/dst element type mismatch for thenCopyD2D"); -    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src); -    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst); -    if (SrcSlice.getElementCount() != DstSlice.getElementCount()) -      setError("array size mismatch for D2D, device source has element count " + -               llvm::Twine(SrcSlice.getElementCount()) + -               " but device destination has element count " + -               llvm::Twine(DstSlice.getElementCount())); -    else -      thenCopyD2D(SrcSlice, DstSlice, SrcSlice.getElementCount()); -    return *this; -  } - -  ///@} End device memory copying functions - -private: -  /// Sets the error state from an Error object. -  /// -  /// Does not overwrite the error if it is already set. -  void setError(Error &&E) { -    if (E) { -      llvm::sys::ScopedWriter WriterLock(*ErrorMessageMutex); -      if (!ErrorMessage) -        ErrorMessage = consumeAndGetMessage(std::move(E)); -    } -  } - -  /// Sets the error state from an error message. -  /// -  /// Does not overwrite the error if it is already set. -  void setError(const llvm::Twine &Message) { -    llvm::sys::ScopedWriter WriterLock(*ErrorMessageMutex); -    if (!ErrorMessage) -      ErrorMessage = Message.str(); -  } - -  /// The PlatformDevice that supports the operations of this stream. -  PlatformDevice *PDevice; - -  /// The platform-specific stream handle for this instance. -  const void *PlatformStreamHandle; - -  /// Mutex that guards the error state flags. -  std::unique_ptr<llvm::sys::RWMutex> ErrorMessageMutex; - -  /// First error message for an operation in this stream or empty if there have -  /// been no errors. -  llvm::Optional<std::string> ErrorMessage; -}; - -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_STREAM_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/StreamExecutor.h b/parallel-libs/streamexecutor/include/streamexecutor/StreamExecutor.h deleted file mode 100644 index 942cd325f85..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/StreamExecutor.h +++ /dev/null @@ -1,75 +0,0 @@ -//===-- StreamExecutor.h - Main include file for StreamExecutor -*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -/// \mainpage Welcome to StreamExecutor -/// -/// \section Introduction -/// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming -/// models (runtimes). This abstraction cleanly permits host code to target -/// either CUDA or OpenCL devices with identically-functioning data parallel -/// kernels. It manages the execution of concurrent work targeting the -/// accelerator, similar to a host-side Executor. -/// -/// This version of StreamExecutor can be built either as a sub-project of the -/// LLVM project or as a standalone project depending on LLVM as an external -/// package. -/// -/// \subsection ExampleUsage Example Usage -/// Below is an example of the use of the StreamExecutor API: -/// -/// \snippet examples/CUDASaxpy.cpp Example saxpy host main -/// -/// In the example, a couple of handler functions, \c getOrDie and \c -/// dieIfError, are used to handle error return values in the StreamExecutor -/// API. These functions are provided by StreamExecutor for quick-and-dirty -/// error handling, but real applications will likely want to define their own -/// versions of these handlers so that errors are handled more gracefully than -/// just exiting the program. -/// -/// \subsection CompilerGeneratedCode Compiler-Generated Code -/// -/// The example also references some symbols from a compiler-generated -/// namespace: -/// -/// \snippet examples/CUDASaxpy.cpp Example saxpy compiler-generated -/// -/// Instead of depending on the compiler to generate this code, you can -/// technically write the code yourself, but this is not recommended because the -/// code is very error-prone. For example, the template parameters for the -/// Kernel specialization have to match the parameter types for the device -/// kernel, and the MultiKernelLoaderSpec has to be initialized with valid -/// device code for the kernel. Errors in this code will not show up until -/// runtime, and may only show up as garbage output rather than an explicit -/// error, which can be very hard to debug, so again, it is strongly advised not -/// to write this code yourself. -/// -/// The example compiler-generated code uses a PTX string in the source code to -/// store the device code, but the device code can also be stored in other -/// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be -/// stored for other platforms such as OpenCL, and StreamExecutor will pick the -/// right device code at runtime based on the user's platform selection. See -/// streamexecutor::MultiKernelLoaderSpec for details of how device code can be -/// stored for different platforms, but again, the code to set up the -/// MultiKernelLoaderSpec instance should be generated by the compiler if -/// possible, not by the user. - -/// \example examples/CUDASaxpy.cpp -/// Running saxpy on a CUDA device. - -#ifndef STREAMEXECUTOR_STREAMEXECUTOR_H -#define STREAMEXECUTOR_STREAMEXECUTOR_H - -#include "Device.h" -#include "Kernel.h" -#include "KernelSpec.h" -#include "Platform.h" -#include "PlatformManager.h" -#include "Stream.h" - -#endif // STREAMEXECUTOR_STREAMEXECUTOR_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h deleted file mode 100644 index cbcd29af819..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h +++ /dev/null @@ -1,42 +0,0 @@ -//===-- CUDAPlatform.h - CUDA platform subclass -----------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Declaration of the CUDAPlatform class. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H -#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H - -#include "streamexecutor/Platform.h" -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" - -#include "llvm/Support/Mutex.h" - -#include <map> - -namespace streamexecutor { -namespace cuda { - -class CUDAPlatform : public Platform { -public: -  size_t getDeviceCount() const override; - -  Expected<Device> getDevice(size_t DeviceIndex) override; - -private: -  llvm::sys::Mutex Mutex; -  std::map<size_t, CUDAPlatformDevice> PlatformDevices; -}; - -} // namespace cuda -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h deleted file mode 100644 index b7c32985136..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h +++ /dev/null @@ -1,93 +0,0 @@ -//===-- CUDAPlatformDevice.h - CUDAPlatformDevice class ---------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Declaration of the CUDAPlatformDevice class. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H -#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H - -#include "streamexecutor/PlatformDevice.h" - -namespace streamexecutor { -namespace cuda { - -Error CUresultToError(int CUResult, const llvm::Twine &Message); - -class CUDAPlatformDevice : public PlatformDevice { -public: -  static Expected<CUDAPlatformDevice> create(size_t DeviceIndex); - -  CUDAPlatformDevice(const CUDAPlatformDevice &) = delete; -  CUDAPlatformDevice &operator=(const CUDAPlatformDevice &) = delete; - -  CUDAPlatformDevice(CUDAPlatformDevice &&) noexcept; -  CUDAPlatformDevice &operator=(CUDAPlatformDevice &&) noexcept; - -  ~CUDAPlatformDevice() override; - -  std::string getName() const override; - -  std::string getPlatformName() const override { return "CUDA"; } - -  Expected<const void *> -  createKernel(const MultiKernelLoaderSpec &Spec) override; -  Error destroyKernel(const void *Handle) override; - -  Expected<const void *> createStream() override; -  Error destroyStream(const void *Handle) override; - -  Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize, -               GridDimensions GridSize, const void *PKernelHandle, -               const PackedKernelArgumentArrayBase &ArgumentArray) override; - -  Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle, -                size_t SrcByteOffset, void *HostDst, size_t DstByteOffset, -                size_t ByteCount) override; - -  Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc, -                size_t SrcByteOffset, const void *DeviceDstHandle, -                size_t DstByteOffset, size_t ByteCount) override; - -  Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle, -                size_t SrcByteOffset, const void *DeviceDstHandle, -                size_t DstByteOffset, size_t ByteCount) override; - -  Error blockHostUntilDone(const void *PlatformStreamHandle) override; - -  Expected<void *> allocateDeviceMemory(size_t ByteCount) override; -  Error freeDeviceMemory(const void *Handle) override; - -  Error registerHostMemory(void *Memory, size_t ByteCount) override; -  Error unregisterHostMemory(const void *Memory) override; - -  Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset, -                           void *HostDst, size_t DstByteOffset, -                           size_t ByteCount) override; - -  Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, -                           const void *DeviceDstHandle, size_t DstByteOffset, -                           size_t ByteCount) override; - -  Error synchronousCopyD2D(const void *DeviceDstHandle, size_t DstByteOffset, -                           const void *DeviceSrcHandle, size_t SrcByteOffset, -                           size_t ByteCount) override; - -private: -  CUDAPlatformDevice(size_t DeviceIndex) : DeviceIndex(DeviceIndex) {} - -  int DeviceIndex; -}; - -} // namespace cuda -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h deleted file mode 100644 index 338e3f6265a..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h +++ /dev/null @@ -1,53 +0,0 @@ -//===-- HostPlatform.h - Host platform subclass -----------------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Declaration of the HostPlatform class. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H -#define STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H - -#include "HostPlatformDevice.h" -#include "streamexecutor/Device.h" -#include "streamexecutor/Platform.h" - -#include "llvm/Support/Mutex.h" - -namespace streamexecutor { -namespace host { - -/// Platform that performs work on the host rather than offloading to an -/// accelerator. -class HostPlatform : public Platform { -public: -  size_t getDeviceCount() const override { return 1; } - -  Expected<Device> getDevice(size_t DeviceIndex) override { -    if (DeviceIndex != 0) { -      return make_error( -          "Requested device index " + llvm::Twine(DeviceIndex) + -          " from host platform which only supports device index 0"); -    } -    llvm::sys::ScopedLock Lock(Mutex); -    if (!ThePlatformDevice) -      ThePlatformDevice = llvm::make_unique<HostPlatformDevice>(); -    return Device(ThePlatformDevice.get()); -  } - -private: -  llvm::sys::Mutex Mutex; -  std::unique_ptr<HostPlatformDevice> ThePlatformDevice; -}; - -} // namespace host -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h deleted file mode 100644 index d6655756450..00000000000 --- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h +++ /dev/null @@ -1,161 +0,0 @@ -//===-- HostPlatformDevice.h - HostPlatformDevice class ---------*- C++ -*-===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Declaration of the HostPlatformDevice class. -/// -//===----------------------------------------------------------------------===// - -#ifndef STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H -#define STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H - -#include <cstdlib> -#include <cstring> - -#include "streamexecutor/PlatformDevice.h" - -namespace streamexecutor { -namespace host { - -/// A concrete PlatformDevice subclass that performs its work on the host rather -/// than offloading to an accelerator. -class HostPlatformDevice : public PlatformDevice { -public: -  std::string getName() const override { return "host"; } - -  std::string getPlatformName() const override { return "host"; } - -  Expected<const void *> -  createKernel(const MultiKernelLoaderSpec &Spec) override { -    if (!Spec.hasHostFunction()) { -      return make_error("no host implementation available for kernel " + -                        Spec.getKernelName()); -    } -    return static_cast<const void *>(&Spec.getHostFunction()); -  } - -  Error destroyKernel(const void *Handle) override { return Error::success(); } - -  Expected<const void *> createStream() override { -    // TODO(jhen): Do something with threads to allow multiple streams. -    return this; -  } - -  Error destroyStream(const void *Handle) override { return Error::success(); } - -  Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize, -               GridDimensions GridSize, const void *PKernelHandle, -               const PackedKernelArgumentArrayBase &ArgumentArray) override { -    // TODO(jhen): Can we do something with BlockSize and GridSize? -    if (!(BlockSize.X == 1 && BlockSize.Y == 1 && BlockSize.Z == 1)) { -      return make_error( -          "Block dimensions were (" + llvm::Twine(BlockSize.X) + "," + -          llvm::Twine(BlockSize.Y) + "," + llvm::Twine(BlockSize.Z) + -          "), but only size (1,1,1) is permitted for this platform"); -    } -    if (!(GridSize.X == 1 && GridSize.Y == 1 && GridSize.Z == 1)) { -      return make_error( -          "Grid dimensions were (" + llvm::Twine(GridSize.X) + "," + -          llvm::Twine(GridSize.Y) + "," + llvm::Twine(GridSize.Z) + -          "), but only size (1,1,1) is permitted for this platform"); -    } - -    (*static_cast<const std::function<void(const void *const *)> *>( -        PKernelHandle))(ArgumentArray.getAddresses()); -    return Error::success(); -  } - -  Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle, -                size_t SrcByteOffset, void *HostDst, size_t DstByteOffset, -                size_t ByteCount) override { -    std::memcpy(offset(HostDst, DstByteOffset), -                offset(DeviceSrcHandle, SrcByteOffset), ByteCount); -    return Error::success(); -  } - -  Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc, -                size_t SrcByteOffset, const void *DeviceDstHandle, -                size_t DstByteOffset, size_t ByteCount) override { -    std::memcpy(offset(DeviceDstHandle, DstByteOffset), -                offset(HostSrc, SrcByteOffset), ByteCount); -    return Error::success(); -  } - -  Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle, -                size_t SrcByteOffset, const void *DeviceDstHandle, -                size_t DstByteOffset, size_t ByteCount) override { -    std::memcpy(offset(DeviceDstHandle, DstByteOffset), -                offset(DeviceSrcHandle, SrcByteOffset), ByteCount); -    return Error::success(); -  } - -  Error blockHostUntilDone(const void *PlatformStreamHandle) override { -    // All host operations are synchronous anyway. -    return Error::success(); -  } - -  Expected<void *> allocateDeviceMemory(size_t ByteCount) override { -    return std::malloc(ByteCount); -  } - -  Error freeDeviceMemory(const void *Handle) override { -    std::free(const_cast<void *>(Handle)); -    return Error::success(); -  } - -  Error registerHostMemory(void *Memory, size_t ByteCount) override { -    return Error::success(); -  } - -  Error unregisterHostMemory(const void *Memory) override { -    return Error::success(); -  } - -  Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset, -                           void *HostDst, size_t DstByteOffset, -                           size_t ByteCount) override { -    std::memcpy(offset(HostDst, DstByteOffset), -                offset(DeviceSrcHandle, SrcByteOffset), ByteCount); -    return Error::success(); -  } - -  Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset, -                           const void *DeviceDstHandle, size_t DstByteOffset, -                           size_t ByteCount) override { -    std::memcpy(offset(DeviceDstHandle, DstByteOffset), -                offset(HostSrc, SrcByteOffset), ByteCount); -    return Error::success(); -  } - -  Error synchronousCopyD2D(const void *DeviceSrcHandle, size_t SrcByteOffset, -                           const void *DeviceDstHandle, size_t DstByteOffset, -                           size_t ByteCount) override { -    std::memcpy(offset(DeviceDstHandle, DstByteOffset), -                offset(DeviceSrcHandle, SrcByteOffset), ByteCount); -    return Error::success(); -  } - -  /// Gets the value at the given index from a GlobalDeviceMemory<T> instance -  /// created by this class. -  template <typename T> -  static T getDeviceValue(const streamexecutor::GlobalDeviceMemory<T> &Memory, -                          size_t Index) { -    return static_cast<const T *>(Memory.getHandle())[Index]; -  } - -private: -  static void *offset(const void *Base, size_t Offset) { -    return const_cast<char *>(static_cast<const char *>(Base) + Offset); -  } -}; - -} // namespace host -} // namespace streamexecutor - -#endif // STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt deleted file mode 100644 index 6157654a97a..00000000000 --- a/parallel-libs/streamexecutor/lib/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -macro(add_se_library name) -  add_llvm_library(${name} ${ARGN}) -  set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries") -endmacro(add_se_library) - -add_subdirectory(platforms) - -add_se_library( -    streamexecutor -    Device.cpp -    DeviceMemory.cpp -    Error.cpp -    HostMemory.cpp -    Kernel.cpp -    KernelSpec.cpp -    PackedKernelArgumentArray.cpp -    Platform.cpp -    PlatformDevice.cpp -    PlatformManager.cpp -    Stream.cpp -    ${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT} -    LINK_LIBS -    ${STREAM_EXECUTOR_LIBCUDA_LIBRARIES}) - -install(TARGETS streamexecutor DESTINATION lib) diff --git a/parallel-libs/streamexecutor/lib/Device.cpp b/parallel-libs/streamexecutor/lib/Device.cpp deleted file mode 100644 index 2bed3e7be16..00000000000 --- a/parallel-libs/streamexecutor/lib/Device.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===-- Device.cpp - Device implementation --------------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of Device class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Device.h" - -#include <cassert> - -#include "streamexecutor/PlatformDevice.h" -#include "streamexecutor/Stream.h" - -#include "llvm/ADT/STLExtras.h" - -namespace streamexecutor { - -Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {} - -Device::~Device() = default; - -Expected<Stream> Device::createStream() { -  Expected<const void *> MaybePlatformStream = PDevice->createStream(); -  if (!MaybePlatformStream) -    return MaybePlatformStream.takeError(); -  return Stream(PDevice, *MaybePlatformStream); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp b/parallel-libs/streamexecutor/lib/DeviceMemory.cpp deleted file mode 100644 index 8447a60b1ca..00000000000 --- a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//===-- DeviceMemory.cpp - DeviceMemory implementation --------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of DeviceMemory class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/DeviceMemory.h" - -#include "streamexecutor/Device.h" - -namespace streamexecutor { - -GlobalDeviceMemoryBase::~GlobalDeviceMemoryBase() { -  if (Handle) -    // TODO(jhen): How to handle errors here. -    consumeError(TheDevice->freeDeviceMemory(*this)); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Error.cpp b/parallel-libs/streamexecutor/lib/Error.cpp deleted file mode 100644 index 0d728fab669..00000000000 --- a/parallel-libs/streamexecutor/lib/Error.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===-- Error.cpp - Error handling ----------------------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Types for returning recoverable errors. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Error.h" - -#include "llvm/ADT/StringRef.h" - -namespace { - -// An error with a string message describing the cause. -class StreamExecutorError : public llvm::ErrorInfo<StreamExecutorError> { -public: -  StreamExecutorError(llvm::StringRef Message) : Message(Message.str()) {} - -  void log(llvm::raw_ostream &OS) const override { OS << Message; } - -  std::error_code convertToErrorCode() const override { -    llvm_unreachable( -        "StreamExecutorError does not support conversion to std::error_code"); -  } - -  std::string getErrorMessage() const { return Message; } - -  static char ID; - -private: -  std::string Message; -}; - -char StreamExecutorError::ID = 0; - -} // namespace - -namespace streamexecutor { - -Error make_error(const Twine &Message) { -  return llvm::make_error<StreamExecutorError>(Message.str()); -} - -std::string consumeAndGetMessage(Error &&E) { -  if (!E) -    return "success"; -  std::string Message; -  llvm::handleAllErrors(std::move(E), -                        [&Message](const StreamExecutorError &SEE) { -                          Message = SEE.getErrorMessage(); -                        }); -  return Message; -} - -void dieIfError(Error &&E) { -  if (E) { -    std::fprintf(stderr, "Error encountered: %s.\n", -                 streamexecutor::consumeAndGetMessage(std::move(E)).c_str()); -    std::exit(EXIT_FAILURE); -  } -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/HostMemory.cpp b/parallel-libs/streamexecutor/lib/HostMemory.cpp deleted file mode 100644 index 8eba7e6b563..00000000000 --- a/parallel-libs/streamexecutor/lib/HostMemory.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===-- HostMemory.cpp - HostMemory implementation ------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of HostMemory internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/HostMemory.h" -#include "streamexecutor/Device.h" - -namespace streamexecutor { -namespace internal { - -void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer) { -  // TODO(jhen): How to handle errors here? -  if (Pointer) -    consumeError(TheDevice->unregisterHostMemory(Pointer)); -} - -} // namespace internal -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp deleted file mode 100644 index 911ac6656aa..00000000000 --- a/parallel-libs/streamexecutor/lib/Kernel.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//===-- Kernel.cpp - General kernel implementation ------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation details for kernel types. -/// -//===----------------------------------------------------------------------===// - -#include <cassert> - -#include "streamexecutor/Device.h" -#include "streamexecutor/Kernel.h" -#include "streamexecutor/PlatformDevice.h" - -#include "llvm/DebugInfo/Symbolize/Symbolize.h" - -namespace streamexecutor { - -KernelBase::KernelBase(PlatformDevice *D, const void *PlatformKernelHandle, -                       llvm::StringRef Name) -    : PDevice(D), PlatformKernelHandle(PlatformKernelHandle), Name(Name), -      DemangledName( -          llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr)) { -  assert(D != nullptr && -         "cannot construct a kernel object with a null platform device"); -  assert(PlatformKernelHandle != nullptr && -         "cannot construct a kernel object with a null platform kernel handle"); -} - -KernelBase::KernelBase(KernelBase &&Other) noexcept -    : PDevice(Other.PDevice), PlatformKernelHandle(Other.PlatformKernelHandle), -      Name(std::move(Other.Name)), -      DemangledName(std::move(Other.DemangledName)) { -  Other.PDevice = nullptr; -  Other.PlatformKernelHandle = nullptr; -} - -KernelBase &KernelBase::operator=(KernelBase &&Other) noexcept { -  PDevice = Other.PDevice; -  PlatformKernelHandle = Other.PlatformKernelHandle; -  Name = std::move(Other.Name); -  DemangledName = std::move(Other.DemangledName); -  Other.PDevice = nullptr; -  Other.PlatformKernelHandle = nullptr; -  return *this; -} - -KernelBase::~KernelBase() { -  if (PlatformKernelHandle) -    // TODO(jhen): Handle the error here. -    consumeError(PDevice->destroyKernel(PlatformKernelHandle)); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/KernelSpec.cpp b/parallel-libs/streamexecutor/lib/KernelSpec.cpp deleted file mode 100644 index 951ea8fc41c..00000000000 --- a/parallel-libs/streamexecutor/lib/KernelSpec.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//===-- KernelSpec.cpp - General kernel spec implementation ---------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation details for kernel loader specs. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/KernelSpec.h" - -#include "llvm/ADT/STLExtras.h" - -namespace streamexecutor { - -KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName) -    : KernelName(KernelName) {} - -CUDAPTXInMemorySpec::CUDAPTXInMemorySpec( -    llvm::StringRef KernelName, -    const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) -    : KernelLoaderSpec(KernelName) { -  for (const auto &Spec : SpecList) -    PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode); -} - -const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor, -                                         int ComputeCapabilityMinor) const { -  auto Iterator = -      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{ -          ComputeCapabilityMajor, ComputeCapabilityMinor}); -  if (Iterator == PTXByComputeCapability.begin()) -    return nullptr; -  --Iterator; -  return Iterator->second; -} - -CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName, -                                               const void *Bytes) -    : KernelLoaderSpec(KernelName), Bytes(Bytes) {} - -OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName, -                                               const char *Text) -    : KernelLoaderSpec(KernelName), Text(Text) {} - -void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) { -  if (TheKernelName) -    assert(KernelName.equals(*TheKernelName) && -           "different kernel names in one MultiKernelLoaderSpec"); -  else -    TheKernelName = llvm::make_unique<std::string>(KernelName); -} - -MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory( -    llvm::StringRef KernelName, -    llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) { -  assert((TheCUDAPTXInMemorySpec == nullptr) && -         "illegal loader spec overwrite"); -  setKernelName(KernelName); -  TheCUDAPTXInMemorySpec = -      llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList); -  return *this; -} - -MultiKernelLoaderSpec & -MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName, -                                             const void *Bytes) { -  assert((TheCUDAFatbinInMemorySpec == nullptr) && -         "illegal loader spec overwrite"); -  setKernelName(KernelName); -  TheCUDAFatbinInMemorySpec = -      llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes); -  return *this; -} - -MultiKernelLoaderSpec & -MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName, -                                             const char *OpenCLText) { -  assert((TheOpenCLTextInMemorySpec == nullptr) && -         "illegal loader spec overwrite"); -  setKernelName(KernelName); -  TheOpenCLTextInMemorySpec = -      llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText); -  return *this; -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp b/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp deleted file mode 100644 index 04ac80d74ed..00000000000 --- a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation details for classes from PackedKernelArgumentArray.h. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/PackedKernelArgumentArray.h" - -namespace streamexecutor { - -PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default; - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Platform.cpp b/parallel-libs/streamexecutor/lib/Platform.cpp deleted file mode 100644 index 4250468a022..00000000000 --- a/parallel-libs/streamexecutor/lib/Platform.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- Platform.cpp - Platform implementation ----------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of Platform class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Platform.h" - -namespace streamexecutor { - -Platform::~Platform() = default; - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp b/parallel-libs/streamexecutor/lib/PlatformDevice.cpp deleted file mode 100644 index 8dd44a3a4aa..00000000000 --- a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- PlatformDevice.cpp - Platform interface implementations -----------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation file for PlatformDevice.h. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/PlatformDevice.h" - -namespace streamexecutor { - -PlatformDevice::~PlatformDevice() = default; - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/PlatformManager.cpp b/parallel-libs/streamexecutor/lib/PlatformManager.cpp deleted file mode 100644 index 8f44befbd71..00000000000 --- a/parallel-libs/streamexecutor/lib/PlatformManager.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===-- PlatformManager.cpp - PlatformManager implementation --------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of PlatformManager class internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/PlatformManager.h" - -#include "streamexecutor/PlatformOptions.h" -#include "streamexecutor/platforms/host/HostPlatform.h" - -#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM -#include "streamexecutor/platforms/cuda/CUDAPlatform.h" -#endif - -namespace streamexecutor { - -PlatformManager::PlatformManager() { -  // TODO(jhen): Register known platforms by name. -  // We have a couple of options here: -  //  * Use build-system flags to set preprocessor macros that select the -  //    appropriate code to include here. -  //  * Use static initialization tricks to have platform libraries register -  //    themselves when they are loaded. - -  PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>()); - -#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM -  PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>()); -#endif -} - -Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) { -  static PlatformManager Instance; -  auto Iterator = Instance.PlatformsByName.find(Name.lower()); -  if (Iterator != Instance.PlatformsByName.end()) -    return Iterator->second.get(); -  return make_error("no available platform with name " + Name); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp deleted file mode 100644 index fe135b4d0af..00000000000 --- a/parallel-libs/streamexecutor/lib/Stream.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===-- Stream.cpp - General stream implementation ------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation details for a general stream object. -/// -//===----------------------------------------------------------------------===// - -#include <cassert> - -#include "streamexecutor/Stream.h" - -namespace streamexecutor { - -Stream::Stream(PlatformDevice *D, const void *PlatformStreamHandle) -    : PDevice(D), PlatformStreamHandle(PlatformStreamHandle), -      ErrorMessageMutex(llvm::make_unique<llvm::sys::RWMutex>()) { -  assert(D != nullptr && -         "cannot construct a stream object with a null platform device"); -  assert(PlatformStreamHandle != nullptr && -         "cannot construct a stream object with a null platform stream handle"); -} - -Stream::Stream(Stream &&Other) noexcept -    : PDevice(Other.PDevice), PlatformStreamHandle(Other.PlatformStreamHandle), -      ErrorMessageMutex(std::move(Other.ErrorMessageMutex)), -      ErrorMessage(std::move(Other.ErrorMessage)) { -  Other.PDevice = nullptr; -  Other.PlatformStreamHandle = nullptr; -} - -Stream &Stream::operator=(Stream &&Other) noexcept { -  PDevice = Other.PDevice; -  PlatformStreamHandle = Other.PlatformStreamHandle; -  ErrorMessageMutex = std::move(Other.ErrorMessageMutex); -  ErrorMessage = std::move(Other.ErrorMessage); -  Other.PDevice = nullptr; -  Other.PlatformStreamHandle = nullptr; -  return *this; -} - -Stream::~Stream() { -  if (PlatformStreamHandle) -    // TODO(jhen): Handle error condition here. -    consumeError(PDevice->destroyStream(PlatformStreamHandle)); -} - -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt deleted file mode 100644 index 0802c059add..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) -    add_subdirectory(cuda) -endif() diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt deleted file mode 100644 index 5be76d1c75a..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_library( -    streamexecutor_cuda_platform -    OBJECT -    CUDAPlatform.cpp -    CUDAPlatformDevice.cpp) diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp deleted file mode 100644 index 9f9e4388647..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp +++ /dev/null @@ -1,65 +0,0 @@ -//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDA platform internals. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatform.h" -#include "streamexecutor/Device.h" -#include "streamexecutor/Platform.h" -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" - -#include "llvm/Support/Mutex.h" - -#include "cuda.h" - -#include <map> - -namespace streamexecutor { -namespace cuda { - -static CUresult ensureCUDAInitialized() { -  static CUresult InitResult = []() { return cuInit(0); }(); -  return InitResult; -} - -size_t CUDAPlatform::getDeviceCount() const { -  if (ensureCUDAInitialized()) -    // TODO(jhen): Log an error. -    return 0; - -  int DeviceCount = 0; -  CUresult Result = cuDeviceGetCount(&DeviceCount); -  (void)Result; -  // TODO(jhen): Log an error. - -  return DeviceCount; -} - -Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) { -  if (CUresult InitResult = ensureCUDAInitialized()) -    return CUresultToError(InitResult, "cached cuInit return value"); - -  llvm::sys::ScopedLock Lock(Mutex); -  auto Iterator = PlatformDevices.find(DeviceIndex); -  if (Iterator == PlatformDevices.end()) { -    if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) { -      Iterator = -          PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first; -    } else { -      return MaybePDevice.takeError(); -    } -  } -  return Device(&Iterator->second); -} - -} // namespace cuda -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp deleted file mode 100644 index 5284a9a0a35..00000000000 --- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp +++ /dev/null @@ -1,307 +0,0 @@ -//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implementation of CUDAPlatformDevice. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h" -#include "streamexecutor/PlatformDevice.h" - -#include "cuda.h" - -namespace streamexecutor { -namespace cuda { - -static void *offset(const void *Base, size_t Offset) { -  return const_cast<char *>(static_cast<const char *>(Base) + Offset); -} - -Error CUresultToError(int CUResult, const llvm::Twine &Message) { -  CUresult Result = static_cast<CUresult>(CUResult); -  if (Result) { -    const char *ErrorName; -    if (cuGetErrorName(Result, &ErrorName)) -      ErrorName = "UNKNOWN ERROR NAME"; -    const char *ErrorString; -    if (cuGetErrorString(Result, &ErrorString)) -      ErrorString = "UNKNOWN ERROR DESCRIPTION"; -    return make_error("CUDA driver error: '" + Message + "', error code = " + -                      llvm::Twine(static_cast<int>(Result)) + ", name = " + -                      ErrorName + ", description = '" + ErrorString + "'"); -  } else -    return Error::success(); -} - -std::string CUDAPlatformDevice::getName() const { -  static std::string CachedName = [](int DeviceIndex) { -    static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024; -    std::string Name = "CUDA device " + std::to_string(DeviceIndex); -    char NameFromDriver[MAX_DRIVER_NAME_BYTES]; -    if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1, -                         DeviceIndex)) { -      NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0'; -      Name.append(": ").append(NameFromDriver); -    } -    return Name; -  }(DeviceIndex); -  return CachedName; -} - -Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) { -  CUdevice DeviceHandle; -  if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex)) -    return CUresultToError(Result, "cuDeviceGet"); - -  CUcontext ContextHandle; -  if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle)) -    return CUresultToError(Result, "cuDevicePrimaryCtxRetain"); - -  if (CUresult Result = cuCtxSetCurrent(ContextHandle)) -    return CUresultToError(Result, "cuCtxSetCurrent"); - -  return CUDAPlatformDevice(DeviceIndex); -} - -CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept -    : DeviceIndex(Other.DeviceIndex) { -  Other.DeviceIndex = -1; -} - -CUDAPlatformDevice &CUDAPlatformDevice:: -operator=(CUDAPlatformDevice &&Other) noexcept { -  DeviceIndex = Other.DeviceIndex; -  Other.DeviceIndex = -1; -  return *this; -} - -CUDAPlatformDevice::~CUDAPlatformDevice() { -  CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex); -  (void)Result; -  // TODO(jhen): Log error. -} - -Expected<const void *> -CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) { -  // TODO(jhen): Maybe first check loaded modules? -  if (!Spec.hasCUDAPTXInMemory()) -    return make_error("no CUDA code available to create kernel"); - -  CUdevice Device = static_cast<int>(DeviceIndex); -  int ComputeCapabilityMajor = 0; -  int ComputeCapabilityMinor = 0; -  if (CUresult Result = cuDeviceGetAttribute( -          &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, -          Device)) -    return CUresultToError( -        Result, -        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR"); -  if (CUresult Result = cuDeviceGetAttribute( -          &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, -          Device)) -    return CUresultToError( -        Result, -        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR"); -  const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor, -                                                       ComputeCapabilityMinor); - -  if (!Code) -    return make_error("no suitable CUDA source found for compute capability " + -                      llvm::Twine(ComputeCapabilityMajor) + "." + -                      llvm::Twine(ComputeCapabilityMinor)); - -  CUmodule Module; -  if (CUresult Result = cuModuleLoadData(&Module, Code)) -    return CUresultToError(Result, "cuModuleLoadData"); - -  CUfunction Function; -  if (CUresult Result = -          cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str())) -    return CUresultToError(Result, "cuModuleGetFunction"); - -  // TODO(jhen): Should I save this function pointer in case someone asks for -  // it again? - -  // TODO(jhen): Should I save the module pointer so I can unload it when I -  // destroy this device? - -  return static_cast<const void *>(Function); -} - -Error CUDAPlatformDevice::destroyKernel(const void *Handle) { -  // TODO(jhen): Maybe keep track of kernels for each module and unload the -  // module after they are all destroyed. -  return Error::success(); -} - -Expected<const void *> CUDAPlatformDevice::createStream() { -  CUstream Stream; -  if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT)) -    return CUresultToError(Result, "cuStreamCreate"); -  return Stream; -} - -Error CUDAPlatformDevice::destroyStream(const void *Handle) { -  return CUresultToError( -      cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))), -      "cuStreamDestroy"); -} - -Error CUDAPlatformDevice::launch( -    const void *PlatformStreamHandle, BlockDimensions BlockSize, -    GridDimensions GridSize, const void *PKernelHandle, -    const PackedKernelArgumentArrayBase &ArgumentArray) { -  CUfunction Function = -      reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle)); -  CUstream Stream = -      reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle)); - -  auto Launch = [Function, Stream, BlockSize, -                 GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) { -    return CUresultToError( -        cuLaunchKernel(Function,                              // -                       GridSize.X, GridSize.Y, GridSize.Z,    // -                       BlockSize.X, BlockSize.Y, BlockSize.Z, // -                       SharedMemoryBytes, Stream, ArgumentAddresses, nullptr), -        "cuLaunchKernel"); -  }; - -  void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses()); -  size_t SharedArgumentCount = ArgumentArray.getSharedCount(); -  if (SharedArgumentCount) { -    // The argument handling in this case is not very efficient. We may need to -    // come back and optimize it later. -    // -    // Perhaps introduce another branch for the case where there is exactly one -    // shared memory argument and it is the first one. This is the only case -    // that will be used for compiler-generated CUDA kernels, and OpenCL users -    // can choose to take advantage of it by combining their dynamic shared -    // memory arguments and putting them first in the kernel signature. -    unsigned SharedMemoryBytes = 0; -    size_t ArgumentCount = ArgumentArray.getArgumentCount(); -    llvm::SmallVector<void *, 16> NonSharedArgumentAddresses( -        ArgumentCount - SharedArgumentCount); -    size_t NonSharedIndex = 0; -    for (size_t I = 0; I < ArgumentCount; ++I) -      if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY) -        SharedMemoryBytes += ArgumentArray.getSize(I); -      else -        NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I]; -    return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data()); -  } -  return Launch(0, ArgumentAddresses); -} - -Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle, -                                  const void *DeviceSrcHandle, -                                  size_t SrcByteOffset, void *HostDst, -                                  size_t DstByteOffset, size_t ByteCount) { -  return CUresultToError( -      cuMemcpyDtoHAsync( -          offset(HostDst, DstByteOffset), -          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), -          ByteCount, -          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), -      "cuMemcpyDtoHAsync"); -} - -Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle, -                                  const void *HostSrc, size_t SrcByteOffset, -                                  const void *DeviceDstHandle, -                                  size_t DstByteOffset, size_t ByteCount) { -  return CUresultToError( -      cuMemcpyHtoDAsync( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          offset(HostSrc, SrcByteOffset), ByteCount, -          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), -      "cuMemcpyHtoDAsync"); -} - -Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle, -                                  const void *DeviceSrcHandle, -                                  size_t SrcByteOffset, -                                  const void *DeviceDstHandle, -                                  size_t DstByteOffset, size_t ByteCount) { -  return CUresultToError( -      cuMemcpyDtoDAsync( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), -          ByteCount, -          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))), -      "cuMemcpyDtoDAsync"); -} - -Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) { -  return CUresultToError(cuStreamSynchronize(static_cast<CUstream>( -                             const_cast<void *>(PlatformStreamHandle))), -                         "cuStreamSynchronize"); -} - -Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) { -  CUdeviceptr Pointer; -  if (CUresult Result = cuMemAlloc(&Pointer, ByteCount)) -    return CUresultToError(Result, "cuMemAlloc"); -  return reinterpret_cast<void *>(Pointer); -} - -Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) { -  return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)), -                         "cuMemFree"); -} - -Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) { -  return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u), -                         "cuMemHostRegister"); -} - -Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) { -  return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)), -                         "cuMemHostUnregister"); -} - -Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle, -                                             size_t SrcByteOffset, -                                             void *HostDst, -                                             size_t DstByteOffset, -                                             size_t ByteCount) { -  return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset), -                                      reinterpret_cast<CUdeviceptr>(offset( -                                          DeviceSrcHandle, SrcByteOffset)), -                                      ByteCount), -                         "cuMemcpyDtoH"); -} - -Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc, -                                             size_t SrcByteOffset, -                                             const void *DeviceDstHandle, -                                             size_t DstByteOffset, -                                             size_t ByteCount) { -  return CUresultToError( -      cuMemcpyHtoD( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          offset(HostSrc, SrcByteOffset), ByteCount), -      "cuMemcpyHtoD"); -} - -Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle, -                                             size_t DstByteOffset, -                                             const void *DeviceSrcHandle, -                                             size_t SrcByteOffset, -                                             size_t ByteCount) { -  return CUresultToError( -      cuMemcpyDtoD( -          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)), -          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)), -          ByteCount), -      "cuMemcpyDtoD"); -} - -} // namespace cuda -} // namespace streamexecutor diff --git a/parallel-libs/streamexecutor/tools/streamexecutor-config/CMakeLists.txt b/parallel-libs/streamexecutor/tools/streamexecutor-config/CMakeLists.txt deleted file mode 100644 index 7c0e5b001ea..00000000000 --- a/parallel-libs/streamexecutor/tools/streamexecutor-config/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -find_package(PythonInterp REQUIRED) -configure_file(streamexecutor-config.in streamexecutor-config) -install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/streamexecutor-config DESTINATION bin) diff --git a/parallel-libs/streamexecutor/tools/streamexecutor-config/streamexecutor-config.in b/parallel-libs/streamexecutor/tools/streamexecutor-config/streamexecutor-config.in deleted file mode 100755 index f3e15305d27..00000000000 --- a/parallel-libs/streamexecutor/tools/streamexecutor-config/streamexecutor-config.in +++ /dev/null @@ -1,231 +0,0 @@ -#!@PYTHON_EXECUTABLE@ -# -#===- streamexecutor-config - Build config script for SE -----*- python -*--===# -# -#                     The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===------------------------------------------------------------------------===# - -r""" -Get configuration info needed to compile programs which use StreamExecutor. - -Runs llvm-config and adds StreamExecutor-specific flags to the output. Supports -only the subset of llvm-config flags that are relevant for applications -compiling against StreamExecutor. - -This utility will typically be used to construct a compile command line for an -application which depends on the StreamExecutor library. - -For example: -  c++ example.cpp -o example \ -      $(streamexecutor-config \ -          --cppflags --cxxflags --ldflags --libs --system-libs) -""" - -import argparse -import errno -import os -import shlex -import subprocess -import sys - -# The following functions are configured by cmake. They use raw triple-quoted -# strings to surround values that are substituted by cmake at configure time. -# This kind of quoting should allow for paths that contain spaces. - -def get_llvm_config_dir(): -  """Gets the path to the llvm-config executable.""" -  return r"""@LLVM_BINARY_DIR@/bin""" - -def get_cmake_install_prefix(): -  """Gets the value of the cmake variable CMAKE_INSTALL_PREFIX.""" -  return r"""@CMAKE_INSTALL_PREFIX@""" - -def convert_library_name(library_name): -  """Converts a library name ending in '.framework' into a '-framework' flag. - -  This is used to support OS X. - -  >>> convert_library_name('') -  '' - -  >>> convert_library_name('/usr/local/lib64/libcuda.so') -  '/usr/local/lib64/libcuda.so' - -  >>> convert_library_name('/Library/Frameworks/cuda.framework') -  '-framework cuda' -  """ -  framework_suffix = '.framework' -  if library_name.endswith(framework_suffix): -    framework_name = os.path.basename(library_name)[:-len(framework_suffix)] -    library_name = '-framework ' + framework_name -  return library_name - -def get_cuda_driver_library(): -  """Gets the value of the cmake variable CUDA_DRIVER_LIBRARY.""" -  return convert_library_name(r"""@CUDA_DRIVER_LIBRARY@""") - -def cuddle_flag(flag, tokens): -  """If flag appears by itself in tokens, combines it with the next token. - -  >>> tokens = ['-I', '/usr/include'] -  >>> cuddle_flag('-I', tokens) -  >>> tokens -  ['-I/usr/include'] - -  >>> tokens = ['-L', '/usr/lib'] -  >>> cuddle_flag('-L', tokens) -  >>> tokens -  ['-L/usr/lib'] - -  >>> tokens = ['-I'] -  >>> cuddle_flag('-I', tokens) -  >>> tokens -  ['-I'] - -  >>> tokens = ['-I', '/usr/include', '-I', '/usr/local/include'] -  >>> cuddle_flag('-I', tokens) -  >>> tokens -  ['-I/usr/include', '-I/usr/local/include'] -  """ -  start = 0 -  while True: -    try: -      index = tokens.index(flag, start) -    except ValueError: -      return -    if index + 1 < len(tokens): -      follower = tokens.pop(index + 1) -      tokens[index] = flag + follower -    start = index + 1 - -def get_llvm_config_output_for_dir(llvm_config_dir, flags_string): -  """Calls llvm-config at the given path and returns the output with -I and -L -  flags cuddled.""" -  output = subprocess.check_output( -      ['%s/llvm-config' % llvm_config_dir] + flags_string.split()).strip() -  tokens = shlex.split(output) -  cuddle_flag('-I', tokens) -  cuddle_flag('-L', tokens) -  return ' '.join(tokens) - -def has_token(token, string): -  """Checks if the given token appears in the string. - -  The token argument must be a single shell token. - -  >>> string = '-I/usr/include -L"/usr/lib"' -  >>> has_token('-I/usr/include', string) -  True -  >>> has_token('-I/usr/local/include', string) -  False -  >>> has_token('-I"/usr/include"', string) -  True -  >>> has_token('-L"/usr/lib"', string) -  True -  >>> has_token('-L/usr/lib', string) -  True -  """ -  split_token = shlex.split(token) -  if len(split_token) > 1: -    raise ValueError('has_token called with a multi-token token: ' + token) -  escaped_token = split_token[0] -  return escaped_token in shlex.split(string) - -def main(): -  parser = argparse.ArgumentParser( -      prog='streamexecutor-config', -      formatter_class=argparse.RawDescriptionHelpFormatter, -      description=__doc__) - -  parser.add_argument( -      '--cppflags', -      action='store_true', -      help= -        'C preprocessor flags for files that include StreamExecutor headers.') - -  parser.add_argument( -      '--cxxflags', -      action='store_true', -      help='C++ compiler flags for files that include StreamExecutor headers.') - -  parser.add_argument( -      '--ldflags', -      action='store_true', -      help='Print linker flags.') - -  parser.add_argument( -      '--libs', -      action='store_true', -      help='Libraries needed to link against StreamExecutor.') - -  parser.add_argument( -      '--system-libs', -      action='store_true', -      help='System libraries needed to link against StreamExecutor.') - -  parser.add_argument( -      '--llvm-config-dir', -      default=get_llvm_config_dir(), -      help='Directory containing the llvm-config executable. '\ -          'If not specified, defaults to the cmake-configured location') - -  args = parser.parse_args() - -  # Print the help message if the user did not pass any flag arguments. -  if not any( -      getattr(args, flag) -        for flag in ('cppflags', 'cxxflags', 'ldflags', 'libs', 'system_libs')): -    parser.print_help() -    sys.exit(1) - -  # Check for the presence of the llvm-config executable. -  if not os.path.isfile('%s/llvm-config' % args.llvm_config_dir): -    sys.exit('llvm-config not found in: ' + args.llvm_config_dir) -  if not os.access('%s/llvm-config' % args.llvm_config_dir, os.X_OK): -    sys.exit('llvm-config not executable in: ' + args.llvm_config_dir) - -  # We will always use args.llvm_config_dir as the second argument to -  # get_llvm_config_output_for_path. -  get_llvm_config_output = lambda flags : get_llvm_config_output_for_dir( -      args.llvm_config_dir, flags) - -  all_flags = [] - -  if args.cppflags: -    llvm_flags = get_llvm_config_output('--cppflags') -    all_flags.append(llvm_flags) -    se_flag = "-I%s/include" % get_cmake_install_prefix() -    if not has_token(token=se_flag, string=llvm_flags): -      all_flags.append(se_flag) - -  if args.cxxflags: -    all_flags.append(get_llvm_config_output('--cxxflags')) - -  if args.ldflags: -    llvm_flags = get_llvm_config_output('--ldflags') -    all_flags.append(llvm_flags) -    se_flag = "-L%s/lib" % get_cmake_install_prefix() -    if not has_token(token=se_flag, string=llvm_flags): -      all_flags.append(se_flag) - -  if args.libs: -    llvm_flags = get_llvm_config_output('--libs support symbolize') -    se_flag = '-lstreamexecutor' -    if not has_token(token=se_flag, string=llvm_flags): -      all_flags.append(se_flag) -    cuda_driver_library = get_cuda_driver_library() -    if cuda_driver_library: -      all_flags.append(cuda_driver_library) -    all_flags.append(llvm_flags) - -  if args.system_libs: -    all_flags.append(get_llvm_config_output('--system-libs')) - -  print(' '.join(all_flags)) - -if __name__ == '__main__': -  main() diff --git a/parallel-libs/streamexecutor/unittests/CMakeLists.txt b/parallel-libs/streamexecutor/unittests/CMakeLists.txt deleted file mode 100644 index 3a8142273a5..00000000000 --- a/parallel-libs/streamexecutor/unittests/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_custom_target(StreamExecutorUnitTests) -set_target_properties(StreamExecutorUnitTests PROPERTIES FOLDER "streamexecutor tests") - -function(add_se_unittest testdir_name) -  add_unittest(StreamExecutorUnitTests ${testdir_name} ${ARGN}) -  target_link_libraries(${testdir_name} streamexecutor) -endfunction() - -add_subdirectory(CoreTests) diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt b/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt deleted file mode 100644 index 7a70ca88245..00000000000 --- a/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM) -    set(CUDA_TEST_SOURCES cuda/CUDATest.cpp) -endif() - -add_se_unittest( -  StreamExecutorCoreTests -  DeviceTest.cpp -  KernelSpecTest.cpp -  PackedKernelArgumentArrayTest.cpp -  StreamTest.cpp -  ${CUDA_TEST_SOURCES} -) diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp deleted file mode 100644 index ab111e52a5a..00000000000 --- a/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp +++ /dev/null @@ -1,378 +0,0 @@ -//===-- DeviceTest.cpp - Tests for Device ---------------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the unit tests for Device code. -/// -//===----------------------------------------------------------------------===// - -#include <cstdlib> -#include <cstring> - -#include "streamexecutor/Device.h" -#include "streamexecutor/PlatformDevice.h" -#include "streamexecutor/platforms/host/HostPlatformDevice.h" - -#include "gtest/gtest.h" - -namespace { - -namespace se = ::streamexecutor; - -const auto &getDeviceValue = se::host::HostPlatformDevice::getDeviceValue<int>; - -/// Test fixture to hold objects used by tests. -class DeviceTest : public ::testing::Test { -public: -  DeviceTest() -      : Device(&PDevice), HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9}, -        HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23}, -        DeviceA5(getOrDie(Device.allocateDeviceMemory<int>(5))), -        DeviceB5(getOrDie(Device.allocateDeviceMemory<int>(5))), -        DeviceA7(getOrDie(Device.allocateDeviceMemory<int>(7))), -        DeviceB7(getOrDie(Device.allocateDeviceMemory<int>(7))), -        Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35} { -    se::dieIfError(Device.synchronousCopyH2D<int>(HostA5, DeviceA5)); -    se::dieIfError(Device.synchronousCopyH2D<int>(HostB5, DeviceB5)); -    se::dieIfError(Device.synchronousCopyH2D<int>(HostA7, DeviceA7)); -    se::dieIfError(Device.synchronousCopyH2D<int>(HostB7, DeviceB7)); -  } - -  se::host::HostPlatformDevice PDevice; -  se::Device Device; - -  // Device memory is backed by host arrays. -  int HostA5[5]; -  int HostB5[5]; -  int HostA7[7]; -  int HostB7[7]; -  se::GlobalDeviceMemory<int> DeviceA5; -  se::GlobalDeviceMemory<int> DeviceB5; -  se::GlobalDeviceMemory<int> DeviceA7; -  se::GlobalDeviceMemory<int> DeviceB7; - -  // Host memory to be used as actual host memory. -  int Host5[5]; -  int Host7[7]; -}; - -#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E)) -#define EXPECT_ERROR(E)                                                        \ -  do {                                                                         \ -    se::Error E__ = E;                                                         \ -    EXPECT_TRUE(static_cast<bool>(E__));                                       \ -    consumeError(std::move(E__));                                              \ -  } while (false) - -using llvm::ArrayRef; -using llvm::MutableArrayRef; - -TEST_F(DeviceTest, GetName) { EXPECT_EQ(Device.getName(), "host"); } - -TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) { -  se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory = -      Device.allocateDeviceMemory<int>(10); -  EXPECT_TRUE(static_cast<bool>(MaybeMemory)); -} - -TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) { -  std::vector<int> Data(10); -  se::Expected<se::RegisteredHostMemory<int>> MaybeMemory = -      Device.registerHostMemory<int>(Data); -  EXPECT_TRUE(static_cast<bool>(MaybeMemory)); -} - -// D2H tests - -TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(HostB5[I], Host5[I]); - -  EXPECT_ERROR( -      Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7)); -} - -TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5))); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  EXPECT_ERROR( -      Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5))); - -  EXPECT_ERROR( -      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7))); -} - -TEST_F(DeviceTest, SyncCopyD2HToPointer) { -  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7)); -} - -TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) { -  EXPECT_NO_ERROR(Device.synchronousCopyD2H( -      DeviceA5.asSlice().slice(1), MutableArrayRef<int>(Host5 + 1, 4), 4)); -  for (int I = 1; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1), -                                            MutableArrayRef<int>(Host5), 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(HostB5[I], Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(), -                                         MutableArrayRef<int>(Host5), 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), -                                         MutableArrayRef<int>(Host7), 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), -                                         MutableArrayRef<int>(Host5), 7)); -} - -TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) { -  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5), -                                            MutableArrayRef<int>(Host5))); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA7[I + 1], Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1), -                                         MutableArrayRef<int>(Host5))); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), -                                         MutableArrayRef<int>(Host7))); -} - -TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2H(DeviceA5.asSlice().slice(1), Host5 + 1, 4)); -  for (int I = 1; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7)); -} - -// H2D tests - -TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) { -  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7)); - -  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7)); - -  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7)); -} - -TEST_F(DeviceTest, SyncCopyH2DToArrayRef) { -  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7)); - -  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5)); -} - -TEST_F(DeviceTest, SyncCopyH2DToPointer) { -  EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7)); -} - -TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) { -  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5 + 1, 4), -                                            DeviceA5.asSlice().slice(1), 4)); -  for (int I = 1; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  EXPECT_NO_ERROR(Device.synchronousCopyH2D( -      ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]); - -  EXPECT_ERROR( -      Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7)); - -  EXPECT_ERROR( -      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7)); - -  EXPECT_ERROR( -      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7)); -} - -TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice())); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  EXPECT_ERROR( -      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice())); - -  EXPECT_ERROR( -      Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice())); -} - -TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) { -  EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7)); -} - -// D2D tests - -TEST_F(DeviceTest, SyncCopyD2DByCount) { -  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7)); -} - -TEST_F(DeviceTest, SyncCopyD2D) { -  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7)); -} - -TEST_F(DeviceTest, SyncCopySliceD2DByCount) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice().slice(1), DeviceB5, 4)); -  for (int I = 0; I < 4; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I + 1), getDeviceValue(DeviceB5, I)); - -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7)); -} - -TEST_F(DeviceTest, SyncCopySliceD2D) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB5, I)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA7.asSlice().slice(1), DeviceB5)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7)); -} - -TEST_F(DeviceTest, SyncCopyD2DSliceByCount) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().slice(2), 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I + 2)); - -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7)); -} - -TEST_F(DeviceTest, SyncCopyD2DSlice) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2))); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I)); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice())); - -  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice())); -} - -TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5)); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2)); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7)); -} - -TEST_F(DeviceTest, SyncCopySliceD2DSlice) { -  EXPECT_NO_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice())); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice())); - -  EXPECT_ERROR( -      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice())); -} - -} // namespace diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp deleted file mode 100644 index 486a3504091..00000000000 --- a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp +++ /dev/null @@ -1,135 +0,0 @@ -//===-- KernelSpecTest.cpp - Tests for KernelSpec -------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the unit tests for the code in KernelSpec. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/KernelSpec.h" - -#include "gtest/gtest.h" - -namespace { - -namespace se = ::streamexecutor; - -TEST(CUDAPTXInMemorySpec, NoCode) { -  se::CUDAPTXInMemorySpec Spec("KernelName", {}); -  EXPECT_EQ("KernelName", Spec.getKernelName()); -  EXPECT_EQ(nullptr, Spec.getCode(1, 0)); -} - -TEST(CUDAPTXInMemorySpec, SingleComputeCapability) { -  const char *PTXCodeString = "Dummy PTX code"; -  se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}}); -  EXPECT_EQ("KernelName", Spec.getKernelName()); -  EXPECT_EQ(nullptr, Spec.getCode(0, 5)); -  EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0)); -  EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0)); -} - -TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) { -  const char *PTXCodeString10 = "Dummy PTX code 10"; -  const char *PTXCodeString30 = "Dummy PTX code 30"; -  se::CUDAPTXInMemorySpec Spec( -      "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}}); -  EXPECT_EQ("KernelName", Spec.getKernelName()); -  EXPECT_EQ(nullptr, Spec.getCode(0, 5)); -  EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0)); -  EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0)); -  EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0)); -} - -TEST(CUDAFatbinInMemorySpec, BasicUsage) { -  const char *FatbinBytes = "Dummy fatbin bytes"; -  se::CUDAFatbinInMemorySpec Spec("KernelName", FatbinBytes); -  EXPECT_EQ("KernelName", Spec.getKernelName()); -  EXPECT_EQ(FatbinBytes, Spec.getBytes()); -} - -TEST(OpenCLTextInMemorySpec, BasicUsage) { -  const char *OpenCLText = "Dummy OpenCL text"; -  se::OpenCLTextInMemorySpec Spec("KernelName", OpenCLText); -  EXPECT_EQ("KernelName", Spec.getKernelName()); -  EXPECT_EQ(OpenCLText, Spec.getText()); -} - -TEST(MultiKernelLoaderSpec, NoCode) { -  se::MultiKernelLoaderSpec MultiSpec; -  EXPECT_FALSE(MultiSpec.hasCUDAPTXInMemory()); -  EXPECT_FALSE(MultiSpec.hasCUDAFatbinInMemory()); -  EXPECT_FALSE(MultiSpec.hasOpenCLTextInMemory()); - -  EXPECT_DEBUG_DEATH(MultiSpec.getCUDAPTXInMemory(), -                     "getting spec that is not present"); -  EXPECT_DEBUG_DEATH(MultiSpec.getCUDAFatbinInMemory(), -                     "getting spec that is not present"); -  EXPECT_DEBUG_DEATH(MultiSpec.getOpenCLTextInMemory(), -                     "getting spec that is not present"); -} - -TEST(MultiKernelLoaderSpec, Registration) { -  se::MultiKernelLoaderSpec MultiSpec; -  const char *KernelName = "KernelName"; -  const char *PTXCodeString = "Dummy PTX code"; -  const char *FatbinBytes = "Dummy fatbin bytes"; -  const char *OpenCLText = "Dummy OpenCL text"; - -  MultiSpec.addCUDAPTXInMemory(KernelName, {{{1, 0}, PTXCodeString}}) -      .addCUDAFatbinInMemory(KernelName, FatbinBytes) -      .addOpenCLTextInMemory(KernelName, OpenCLText); - -  EXPECT_TRUE(MultiSpec.hasCUDAPTXInMemory()); -  EXPECT_TRUE(MultiSpec.hasCUDAFatbinInMemory()); -  EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory()); - -  EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName()); -  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5)); -  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0)); -  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0)); - -  EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName()); -  EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes()); - -  EXPECT_EQ(KernelName, MultiSpec.getOpenCLTextInMemory().getKernelName()); -  EXPECT_EQ(OpenCLText, MultiSpec.getOpenCLTextInMemory().getText()); -} - -TEST(MultiKernelLoaderSpec, RegisterTwice) { -  se::MultiKernelLoaderSpec MultiSpec; -  const char *KernelName = "KernelName"; -  const char *FatbinBytes = "Dummy fatbin bytes"; - -  MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes); - -  EXPECT_DEBUG_DEATH(MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes), -                     "illegal loader spec overwrite"); -} - -TEST(MultiKernelLoaderSpec, ConflictingKernelNames) { -  se::MultiKernelLoaderSpec MultiSpec; -  const char *KernelNameA = "KernelName"; -  std::string KernelNameB = KernelNameA; -  const char *PTXCodeString = "Dummy PTX code"; -  const char *FatbinBytes = "Dummy fatbin bytes"; - -  // Check that names don't conflict if they are equivalent strings in different -  // locations. -  MultiSpec.addCUDAPTXInMemory(KernelNameA, {{{1, 0}, PTXCodeString}}) -      .addCUDAFatbinInMemory(KernelNameB, FatbinBytes); - -  const char *OtherKernelName = "OtherKernelName"; -  const char *OpenCLText = "Dummy OpenCL text"; -  EXPECT_DEBUG_DEATH( -      MultiSpec.addOpenCLTextInMemory(OtherKernelName, OpenCLText), -      "different kernel names in one MultiKernelLoaderSpec"); -} - -} // namespace diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp deleted file mode 100644 index 860f21c323a..00000000000 --- a/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp +++ /dev/null @@ -1,150 +0,0 @@ -//===-- PackedKernelArgumentArrayTest.cpp - tests for kernel arg packing --===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Unit tests for kernel argument packing. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/Device.h" -#include "streamexecutor/DeviceMemory.h" -#include "streamexecutor/PackedKernelArgumentArray.h" -#include "streamexecutor/PlatformDevice.h" -#include "streamexecutor/platforms/host/HostPlatformDevice.h" - -#include "llvm/ADT/Twine.h" - -#include "gtest/gtest.h" - -namespace { - -namespace se = ::streamexecutor; - -using Type = se::KernelArgumentType; - -// Test fixture class for testing argument packing. -// -// Basically defines a bunch of types to be packed so they don't have to be -// defined separately in each test. -class DeviceMemoryPackingTest : public ::testing::Test { -public: -  DeviceMemoryPackingTest() -      : Device(&PDevice), Value(42), Handle(&Value), ByteCount(15), -        ElementCount(5), -        TypedGlobal(getOrDie(Device.allocateDeviceMemory<int>(ElementCount))), -        TypedShared( -            se::SharedDeviceMemory<int>::makeFromElementCount(ElementCount)) {} - -  se::host::HostPlatformDevice PDevice; -  se::Device Device; -  int Value; -  void *Handle; -  size_t ByteCount; -  size_t ElementCount; -  se::GlobalDeviceMemory<int> TypedGlobal; -  se::SharedDeviceMemory<int> TypedShared; -}; - -// Utility method to check the expected address, size, and type for a packed -// argument at the given index of a PackedKernelArgumentArray. -template <typename... ParameterTs> -static void -ExpectEqual(const void *ExpectedAddress, size_t ExpectedSize, Type ExpectedType, -            const se::PackedKernelArgumentArray<ParameterTs...> &Observed, -            size_t Index) { -  SCOPED_TRACE(("Index = " + llvm::Twine(Index)).str()); -  EXPECT_EQ(ExpectedAddress, Observed.getAddress(Index)); -  EXPECT_EQ(ExpectedAddress, Observed.getAddresses()[Index]); -  EXPECT_EQ(ExpectedSize, Observed.getSize(Index)); -  EXPECT_EQ(ExpectedSize, Observed.getSizes()[Index]); -  EXPECT_EQ(ExpectedType, Observed.getType(Index)); -  EXPECT_EQ(ExpectedType, Observed.getTypes()[Index]); -} - -TEST_F(DeviceMemoryPackingTest, SingleValue) { -  auto Array = se::make_kernel_argument_pack(Value); -  ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(0u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, SingleTypedGlobal) { -  auto Array = se::make_kernel_argument_pack(TypedGlobal); -  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *), -              Type::GLOBAL_DEVICE_MEMORY, Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(0u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, SingleTypedGlobalPointer) { -  auto Array = se::make_kernel_argument_pack(&TypedGlobal); -  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *), -              Type::GLOBAL_DEVICE_MEMORY, Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(0u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, SingleConstTypedGlobalPointer) { -  const se::GlobalDeviceMemory<int> *ArgumentPointer = &TypedGlobal; -  auto Array = se::make_kernel_argument_pack(ArgumentPointer); -  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *), -              Type::GLOBAL_DEVICE_MEMORY, Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(0u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, SingleTypedShared) { -  auto Array = se::make_kernel_argument_pack(TypedShared); -  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY, -              Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(1u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, SingleTypedSharedPointer) { -  auto Array = se::make_kernel_argument_pack(&TypedShared); -  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY, -              Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(1u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, SingleConstTypedSharedPointer) { -  const se::SharedDeviceMemory<int> *ArgumentPointer = &TypedShared; -  auto Array = se::make_kernel_argument_pack(ArgumentPointer); -  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY, -              Array, 0); -  EXPECT_EQ(1u, Array.getArgumentCount()); -  EXPECT_EQ(1u, Array.getSharedCount()); -} - -TEST_F(DeviceMemoryPackingTest, PackSeveralArguments) { -  const se::GlobalDeviceMemory<int> *TypedGlobalPointer = &TypedGlobal; -  const se::SharedDeviceMemory<int> *TypedSharedPointer = &TypedShared; -  auto Array = se::make_kernel_argument_pack(Value, TypedGlobal, &TypedGlobal, -                                             TypedGlobalPointer, TypedShared, -                                             &TypedShared, TypedSharedPointer); -  ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0); -  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *), -              Type::GLOBAL_DEVICE_MEMORY, Array, 1); -  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *), -              Type::GLOBAL_DEVICE_MEMORY, Array, 2); -  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *), -              Type::GLOBAL_DEVICE_MEMORY, Array, 3); -  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY, -              Array, 4); -  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY, -              Array, 5); -  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY, -              Array, 6); -  EXPECT_EQ(7u, Array.getArgumentCount()); -  EXPECT_EQ(3u, Array.getSharedCount()); -} - -} // namespace diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp deleted file mode 100644 index c9bbcb952ba..00000000000 --- a/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp +++ /dev/null @@ -1,290 +0,0 @@ -//===-- StreamTest.cpp - Tests for Stream ---------------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the unit tests for Stream code. -/// -//===----------------------------------------------------------------------===// - -#include <cstring> - -#include "streamexecutor/Device.h" -#include "streamexecutor/Kernel.h" -#include "streamexecutor/KernelSpec.h" -#include "streamexecutor/PlatformDevice.h" -#include "streamexecutor/Stream.h" -#include "streamexecutor/platforms/host/HostPlatformDevice.h" - -#include "gtest/gtest.h" - -namespace { - -namespace se = ::streamexecutor; - -const auto &getDeviceValue = se::host::HostPlatformDevice::getDeviceValue<int>; - -/// Test fixture to hold objects used by tests. -class StreamTest : public ::testing::Test { -public: -  StreamTest() -      : DummyPlatformStream(1), Device(&PDevice), -        Stream(&PDevice, &DummyPlatformStream), HostA5{0, 1, 2, 3, 4}, -        HostB5{5, 6, 7, 8, 9}, HostA7{10, 11, 12, 13, 14, 15, 16}, -        HostB7{17, 18, 19, 20, 21, 22, 23}, Host5{24, 25, 26, 27, 28}, -        Host7{29, 30, 31, 32, 33, 34, 35}, -        RegisteredHost5(getOrDie( -            Device.registerHostMemory(llvm::MutableArrayRef<int>(Host5)))), -        RegisteredHost7(getOrDie( -            Device.registerHostMemory(llvm::MutableArrayRef<int>(Host7)))), -        DeviceA5(getOrDie(Device.allocateDeviceMemory<int>(5))), -        DeviceB5(getOrDie(Device.allocateDeviceMemory<int>(5))), -        DeviceA7(getOrDie(Device.allocateDeviceMemory<int>(7))), -        DeviceB7(getOrDie(Device.allocateDeviceMemory<int>(7))) { -    se::dieIfError(Device.synchronousCopyH2D<int>(HostA5, DeviceA5)); -    se::dieIfError(Device.synchronousCopyH2D<int>(HostB5, DeviceB5)); -    se::dieIfError(Device.synchronousCopyH2D<int>(HostA7, DeviceA7)); -    se::dieIfError(Device.synchronousCopyH2D<int>(HostB7, DeviceB7)); -  } - -protected: -  int DummyPlatformStream; // Mimicking a platform where the platform stream -                           // handle is just a stream number. -  se::host::HostPlatformDevice PDevice; -  se::Device Device; -  se::Stream Stream; - -  // Device memory is matched by host arrays. -  int HostA5[5]; -  int HostB5[5]; -  int HostA7[7]; -  int HostB7[7]; - -  // Host memory to be used as actual host memory. -  int Host5[5]; -  int Host7[7]; - -  se::RegisteredHostMemory<int> RegisteredHost5; -  se::RegisteredHostMemory<int> RegisteredHost7; - -  // Device memory. -  se::GlobalDeviceMemory<int> DeviceA5; -  se::GlobalDeviceMemory<int> DeviceB5; -  se::GlobalDeviceMemory<int> DeviceA7; -  se::GlobalDeviceMemory<int> DeviceB7; -}; - -// D2H tests - -TEST_F(StreamTest, CopyD2HToRegisteredRefByCount) { -  Stream.thenCopyD2H(DeviceA5, RegisteredHost5, 5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  Stream.thenCopyD2H(DeviceB5, RegisteredHost5, 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(HostB5[I], Host5[I]); - -  Stream.thenCopyD2H(DeviceA7, RegisteredHost5, 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyD2HToRegistered) { -  Stream.thenCopyD2H(DeviceA5, RegisteredHost5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  Stream.thenCopyD2H(DeviceA5, RegisteredHost7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyD2HSliceToRegiseredSliceByCount) { -  Stream.thenCopyD2H(DeviceA5.asSlice().slice(1), -                     RegisteredHost5.asSlice().slice(1, 4), 4); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 1; I < 5; ++I) -    EXPECT_EQ(HostA5[I], Host5[I]); - -  Stream.thenCopyD2H(DeviceB5.asSlice().drop_back(1), RegisteredHost5, 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(HostB5[I], Host5[I]); - -  Stream.thenCopyD2H(DeviceA5.asSlice(), RegisteredHost7, 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyD2HSliceToRegistered) { -  Stream.thenCopyD2H(DeviceA7.asSlice().slice(1, 5), RegisteredHost5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(HostA7[I + 1], Host5[I]); - -  Stream.thenCopyD2H(DeviceA5.asSlice(), RegisteredHost7); -  EXPECT_FALSE(Stream.isOK()); -} - -// H2D tests - -TEST_F(StreamTest, CopyH2DFromRegisterdByCount) { -  Stream.thenCopyH2D(RegisteredHost5, DeviceA5, 5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  Stream.thenCopyH2D(RegisteredHost5, DeviceB5, 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]); - -  Stream.thenCopyH2D(RegisteredHost7, DeviceA5, 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyH2DFromRegistered) { -  Stream.thenCopyH2D(RegisteredHost5, DeviceA5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  Stream.thenCopyH2D(RegisteredHost7, DeviceA5); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyH2DFromRegisteredSliceToSlice) { -  Stream.thenCopyH2D(RegisteredHost5.asSlice().slice(1, 4), -                     DeviceA5.asSlice().slice(1), 4); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 1; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  Stream.thenCopyH2D(RegisteredHost5, DeviceB5.asSlice().drop_back(1), 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]); - -  Stream.thenCopyH2D(RegisteredHost5, DeviceA5.asSlice(), 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyH2DRegisteredToSlice) { -  Stream.thenCopyH2D(RegisteredHost5, DeviceA5.asSlice()); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]); - -  Stream.thenCopyH2D(RegisteredHost7, DeviceA5.asSlice()); -  EXPECT_FALSE(Stream.isOK()); -} - -// D2D tests - -TEST_F(StreamTest, CopyD2DByCount) { -  Stream.thenCopyD2D(DeviceA5, DeviceB5, 5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  Stream.thenCopyD2D(DeviceA7, DeviceB7, 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  Stream.thenCopyD2D(DeviceA7, DeviceB5, 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyD2D) { -  Stream.thenCopyD2D(DeviceA5, DeviceB5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  Stream.thenCopyD2D(DeviceA7, DeviceB5); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopySliceD2DByCount) { -  Stream.thenCopyD2D(DeviceA5.asSlice().slice(1), DeviceB5, 4); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 4; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I + 1), getDeviceValue(DeviceB5, I)); - -  Stream.thenCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5, 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopySliceD2D) { -  Stream.thenCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB5, I)); - -  Stream.thenCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyD2DSliceByCount) { -  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice().slice(2), 5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I + 2)); - -  Stream.thenCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice(), 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopyD2DSlice) { -  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I)); - -  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice()); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopySliceD2DSliceByCount) { -  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  Stream.thenCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 2; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I)); - -  Stream.thenCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7); -  EXPECT_FALSE(Stream.isOK()); -} - -TEST_F(StreamTest, CopySliceD2DSlice) { -  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()); -  EXPECT_TRUE(Stream.isOK()); -  for (int I = 0; I < 5; ++I) -    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I)); - -  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()); -  EXPECT_FALSE(Stream.isOK()); -} - -} // namespace diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/cuda/CUDATest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/cuda/CUDATest.cpp deleted file mode 100644 index caf14368082..00000000000 --- a/parallel-libs/streamexecutor/unittests/CoreTests/cuda/CUDATest.cpp +++ /dev/null @@ -1,215 +0,0 @@ -//===-- CUDATest.cpp - Tests for CUDA platform ----------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the unit tests for CUDA platform code. -/// -//===----------------------------------------------------------------------===// - -#include "streamexecutor/StreamExecutor.h" - -#include "gtest/gtest.h" - -namespace { - -namespace compilergen { -using SaxpyKernel = -    streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>, -                           streamexecutor::GlobalDeviceMemory<float>>; - -const char *SaxpyPTX = R"( -  .version 4.3 -  .target sm_20 -  .address_size 64 - -  .visible .entry saxpy(.param .f32 A, .param .u64 X, .param .u64 Y) { -    .reg .f32 %AValue; -    .reg .f32 %XValue; -    .reg .f32 %YValue; -    .reg .f32 %Result; - -    .reg .b64 %XBaseAddrGeneric; -    .reg .b64 %YBaseAddrGeneric; -    .reg .b64 %XBaseAddrGlobal; -    .reg .b64 %YBaseAddrGlobal; -    .reg .b64 %XAddr; -    .reg .b64 %YAddr; -    .reg .b64 %ThreadByteOffset; - -    .reg .b32 %TID; - -    ld.param.f32 %AValue, [A]; -    ld.param.u64 %XBaseAddrGeneric, [X]; -    ld.param.u64 %YBaseAddrGeneric, [Y]; -    cvta.to.global.u64 %XBaseAddrGlobal, %XBaseAddrGeneric; -    cvta.to.global.u64 %YBaseAddrGlobal, %YBaseAddrGeneric; -    mov.u32 %TID, %tid.x; -    mul.wide.u32 %ThreadByteOffset, %TID, 4; -    add.s64 %XAddr, %ThreadByteOffset, %XBaseAddrGlobal; -    add.s64 %YAddr, %ThreadByteOffset, %YBaseAddrGlobal; -    ld.global.f32 %XValue, [%XAddr]; -    ld.global.f32 %YValue, [%YAddr]; -    fma.rn.f32 %Result, %AValue, %XValue, %YValue; -    st.global.f32 [%XAddr], %Result; -    ret; -  } -)"; - -static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() { -  streamexecutor::MultiKernelLoaderSpec Spec; -  Spec.addCUDAPTXInMemory("saxpy", {{{2, 0}, SaxpyPTX}}); -  return Spec; -}(); - -using SwapPairsKernel = -    streamexecutor::Kernel<streamexecutor::SharedDeviceMemory<int>, -                           streamexecutor::GlobalDeviceMemory<int>, int>; - -const char *SwapPairsPTX = R"( -  .version 4.3 -  .target sm_20 -  .address_size 64 - -  .extern .shared .align 4 .b8 SwapSpace[]; - -  .visible .entry SwapPairs(.param .u64 InOut, .param .u32 InOutSize) { -    .reg .b64 %InOutGeneric; -    .reg .b32 %InOutSizeValue; - -    .reg .b32 %LocalIndex; -    .reg .b32 %PartnerIndex; -    .reg .b32 %ThreadsPerBlock; -    .reg .b32 %BlockIndex; -    .reg .b32 %GlobalIndex; - -    .reg .b32 %GlobalIndexBound; -    .reg .pred %GlobalIndexTooHigh; - -    .reg .b64 %InOutGlobal; -    .reg .b64 %GlobalByteOffset; -    .reg .b64 %GlobalAddress; - -    .reg .b32 %InitialValue; -    .reg .b32 %SwappedValue; - -    .reg .b64 %SharedBaseAddr; -    .reg .b64 %LocalWriteByteOffset; -    .reg .b64 %LocalReadByteOffset; -    .reg .b64 %SharedWriteAddr; -    .reg .b64 %SharedReadAddr; - -    ld.param.u64 %InOutGeneric, [InOut]; -    ld.param.u32 %InOutSizeValue, [InOutSize]; -    mov.u32 %LocalIndex, %tid.x; -    mov.u32 %ThreadsPerBlock, %ntid.x; -    mov.u32 %BlockIndex, %ctaid.x; -    mad.lo.s32 %GlobalIndex, %ThreadsPerBlock, %BlockIndex, %LocalIndex; -    and.b32 %GlobalIndexBound, %InOutSizeValue, -2; -    setp.ge.s32 %GlobalIndexTooHigh, %GlobalIndex, %GlobalIndexBound; -    @%GlobalIndexTooHigh bra END; - -    cvta.to.global.u64 %InOutGlobal, %InOutGeneric; -    mul.wide.s32 %GlobalByteOffset, %GlobalIndex, 4; -    add.s64 %GlobalAddress, %InOutGlobal, %GlobalByteOffset; -    ld.global.u32 %InitialValue, [%GlobalAddress]; -    mul.wide.s32 %LocalWriteByteOffset, %LocalIndex, 4; -    mov.u64 %SharedBaseAddr, SwapSpace; -    add.s64 %SharedWriteAddr, %SharedBaseAddr, %LocalWriteByteOffset; -    st.shared.u32 [%SharedWriteAddr], %InitialValue; -    bar.sync 0; -    xor.b32 %PartnerIndex, %LocalIndex, 1; -    mul.wide.s32 %LocalReadByteOffset, %PartnerIndex, 4; -    add.s64 %SharedReadAddr, %SharedBaseAddr, %LocalReadByteOffset; -    ld.shared.u32 %SwappedValue, [%SharedReadAddr]; -    st.global.u32 [%GlobalAddress], %SwappedValue; - -  END: -    ret; -  } -)"; - -static streamexecutor::MultiKernelLoaderSpec SwapPairsLoaderSpec = []() { -  streamexecutor::MultiKernelLoaderSpec Spec; -  Spec.addCUDAPTXInMemory("SwapPairs", {{{2, 0}, SwapPairsPTX}}); -  return Spec; -}(); -} // namespace compilergen - -namespace se = ::streamexecutor; -namespace cg = ::compilergen; - -class CUDATest : public ::testing::Test { -public: -  CUDATest() -      : Platform(getOrDie(se::PlatformManager::getPlatformByName("CUDA"))), -        Device(getOrDie(Platform->getDevice(0))), -        Stream(getOrDie(Device.createStream())) {} - -  se::Platform *Platform; -  se::Device Device; -  se::Stream Stream; -}; - -TEST_F(CUDATest, Saxpy) { -  float A = 42.0f; -  std::vector<float> HostX = {0, 1, 2, 3}; -  std::vector<float> HostY = {4, 5, 6, 7}; -  size_t ArraySize = HostX.size(); - -  cg::SaxpyKernel Kernel = -      getOrDie(Device.createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec)); - -  se::RegisteredHostMemory<float> RegisteredX = -      getOrDie(Device.registerHostMemory<float>(HostX)); -  se::RegisteredHostMemory<float> RegisteredY = -      getOrDie(Device.registerHostMemory<float>(HostY)); - -  se::GlobalDeviceMemory<float> X = -      getOrDie(Device.allocateDeviceMemory<float>(ArraySize)); -  se::GlobalDeviceMemory<float> Y = -      getOrDie(Device.allocateDeviceMemory<float>(ArraySize)); - -  Stream.thenCopyH2D(RegisteredX, X) -      .thenCopyH2D(RegisteredY, Y) -      .thenLaunch(ArraySize, 1, Kernel, A, X, Y) -      .thenCopyD2H(X, RegisteredX); -  se::dieIfError(Stream.blockHostUntilDone()); - -  std::vector<float> ExpectedX = {4, 47, 90, 133}; -  EXPECT_EQ(ExpectedX, HostX); -} - -TEST_F(CUDATest, DynamicSharedMemory) { -  std::vector<int> HostPairs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; -  std::vector<int> HostResult(HostPairs.size(), 0); -  int ArraySize = HostPairs.size(); - -  cg::SwapPairsKernel Kernel = getOrDie( -      Device.createKernel<cg::SwapPairsKernel>(cg::SwapPairsLoaderSpec)); - -  se::RegisteredHostMemory<int> RegisteredPairs = -      getOrDie(Device.registerHostMemory<int>(HostPairs)); -  se::RegisteredHostMemory<int> RegisteredResult = -      getOrDie(Device.registerHostMemory<int>(HostResult)); - -  se::GlobalDeviceMemory<int> Pairs = -      getOrDie(Device.allocateDeviceMemory<int>(ArraySize)); -  auto SharedMemory = -      se::SharedDeviceMemory<int>::makeFromElementCount(ArraySize); - -  Stream.thenCopyH2D(RegisteredPairs, Pairs) -      .thenLaunch(ArraySize, 1, Kernel, SharedMemory, Pairs, ArraySize) -      .thenCopyD2H(Pairs, RegisteredResult); -  se::dieIfError(Stream.blockHostUntilDone()); - -  std::vector<int> ExpectedPairs = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10}; -  EXPECT_EQ(ExpectedPairs, HostResult); -} - -} // namespace  | 

