[SE] Remove StreamExecutor

Summary: The project has been renamed to Acxxel, so this old directory needs to be deleted. Reviewers: jlebar, jprice Subscribers: beanz, mgorny, parallel_libs-commits, modocache Differential Revision: https://reviews.llvm.org/D25964 llvm-svn: 285115
author: Jason Henline <jhen@google.com> 2016-10-25 20:38:08 +0000
committer: Jason Henline <jhen@google.com> 2016-10-25 20:38:08 +0000
commit: b3f709e10f37225ae65c1d48c4623f6abc2cac1e (patch)
tree: 935df072bbc2ee7873c138dd75f04a32d96c2249 /parallel-libs
parent: 209a77d8d9247b7612025a15f0b4c18bc49e66eb (diff)
download: bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.tar.gz
bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.zip
51 files changed, 0 insertions, 7668 deletions
diff --git a/parallel-libs/CMakeLists.txt b/parallel-libs/CMakeLists.txt
index e96b2e4a479..c1fcf45cd71 100644
--- a/parallel-libs/CMakeLists.txt
+++ b/parallel-libs/CMakeLists.txt
@@ -1,3 +1 @@
 cmake_minimum_required(VERSION 3.1)
-
-add_subdirectory(streamexecutor)
diff --git a/parallel-libs/streamexecutor/CMakeLists.txt b/parallel-libs/streamexecutor/CMakeLists.txt
deleted file mode 100644
index b1862c5e524..00000000000
--- a/parallel-libs/streamexecutor/CMakeLists.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-cmake_minimum_required(VERSION 3.1)
-
-option(STREAM_EXECUTOR_UNIT_TESTS "enable unit tests" ON)
-option(STREAM_EXECUTOR_ENABLE_DOXYGEN "enable StreamExecutor doxygen" ON)
-option(
-    STREAM_EXECUTOR_ENABLE_CONFIG_TOOL
-    "enable building streamexecutor-config tool"
-    ON)
-option(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-    "enable building the CUDA StreamExecutor platform \
-(see CMake's 'FindCUDA' documentation for info on specifying the CUDA path)"
-    OFF)
-
-configure_file(
-    "include/streamexecutor/PlatformOptions.h.in"
-    "include/streamexecutor/PlatformOptions.h")
-
-# First find includes relative to the streamexecutor top-level source path.
-include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/include)
-# Also look for configured headers in the top-level binary directory.
-include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/include)
-
-# If we are not building as part of LLVM, build StreamExecutor as a standalone
-# project using LLVM as an external library:
-string(
-    COMPARE
-    EQUAL
-    "${CMAKE_SOURCE_DIR}"
-    "${CMAKE_CURRENT_SOURCE_DIR}"
-    STREAM_EXECUTOR_STANDALONE)
-
-if(STREAM_EXECUTOR_STANDALONE)
-    project(StreamExecutor)
-
-    find_package(LLVM REQUIRED CONFIG)
-    message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
-    message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
-
-    include_directories(${LLVM_INCLUDE_DIRS})
-    add_definitions(${LLVM_DEFINITIONS})
-
-    # If LLVM does not have RTTI, don't use it here either.
-    if (NOT LLVM_ENABLE_RTTI)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
-    endif()
-
-    set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
-    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
-    include(AddLLVM)
-
-    if(STREAM_EXECUTOR_UNIT_TESTS)
-        enable_testing()
-        find_package(GTest REQUIRED)
-        include_directories(${GTEST_INCLUDE_DIRS})
-        find_package(Threads REQUIRED)
-    endif()
-else(NOT STREAM_EXECUTOR_STANDALONE)
-    if(STREAM_EXECUTOR_UNIT_TESTS)
-        include_directories(
-            "${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include")
-    endif()
-endif(STREAM_EXECUTOR_STANDALONE)
-
-# Find the libraries that correspond to the LLVM components
-# that we wish to use
-llvm_map_components_to_libnames(llvm_libs support symbolize)
-
-# Insist on C++ 11 features.
-set(CMAKE_CXX_STANDARD 11)
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-
-# Add warning flags.
-set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter")
-
-# Check for CUDA if it is enabled.
-if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
-    find_package(CUDA REQUIRED)
-    include_directories(${CUDA_INCLUDE_DIRS})
-    find_library(CUDA_DRIVER_LIBRARY cuda)
-    if(NOT CUDA_DRIVER_LIBRARY)
-        message(FATAL_ERROR
-            "could not find libcuda, \
-is the CUDA driver is installed on your system?")
-    endif()
-    set(
-        STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT
-        $<TARGET_OBJECTS:streamexecutor_cuda_platform>)
-    set(
-        STREAM_EXECUTOR_LIBCUDA_LIBRARIES
-        ${CUDA_DRIVER_LIBRARY})
-endif(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
-
-add_subdirectory(lib)
-add_subdirectory(examples)
-
-if(STREAM_EXECUTOR_UNIT_TESTS)
-    add_subdirectory(unittests)
-endif()
-
-if(STREAM_EXECUTOR_ENABLE_CONFIG_TOOL )
-    add_subdirectory(tools/streamexecutor-config)
-endif(STREAM_EXECUTOR_ENABLE_CONFIG_TOOL )
-
-install(DIRECTORY include/ DESTINATION include)
-
-if (STREAM_EXECUTOR_ENABLE_DOXYGEN)
-    find_package(Doxygen REQUIRED)
-    configure_file(Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
-    add_custom_target(
-        doc
-        ${DOXYGEN_EXECUTABLE}
-        ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
-        WORKING_DIRECTORY
-        ${CMAKE_CURRENT_BINARY_DIR}
-        COMMENT
-        "Generating API documentation with Doxygen"
-        VERBATIM)
-endif(STREAM_EXECUTOR_ENABLE_DOXYGEN)
diff --git a/parallel-libs/streamexecutor/Doxyfile.in b/parallel-libs/streamexecutor/Doxyfile.in
deleted file mode 100644
index 0b237349068..00000000000
--- a/parallel-libs/streamexecutor/Doxyfile.in
+++ /dev/null
@@ -1,2303 +0,0 @@
-# Doxyfile 1.8.6
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a double hash (##) is considered a comment and is placed in
-# front of the TAG it is preceding.
-#
-# All text after a single hash (#) is considered a comment and will be ignored.
-# The format is:
-# TAG = value [value, ...]
-# For lists, items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (\" \").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
-# The default value is: UTF-8.
-
-DOXYFILE_ENCODING      = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
-# double-quotes, unless you are using Doxywizard) that should identify the
-# project for which the documentation is generated. This name is used in the
-# title of most generated pages and in a few other places.
-# The default value is: My Project.
-
-PROJECT_NAME           = "StreamExecutor"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
-# could be handy for archiving the generated documentation or if some version
-# control system is used.
-
-PROJECT_NUMBER         =
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer a
-# quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF          =
-
-# With the PROJECT_LOGO tag one can specify an logo or icon that is included in
-# the documentation. The maximum height of the logo should not exceed 55 pixels
-# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo
-# to the output directory.
-
-PROJECT_LOGO           =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
-# into which the generated documentation will be written. If a relative path is
-# entered, it will be relative to the location where doxygen was started. If
-# left blank the current directory will be used.
-
-OUTPUT_DIRECTORY       =
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
-# option can be useful when feeding doxygen a huge amount of source files, where
-# putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
-# The default value is: NO.
-
-CREATE_SUBDIRS         = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
-# The default value is: English.
-
-OUTPUT_LANGUAGE        = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member
-# descriptions after the members that are listed in the file and class
-# documentation (similar to Javadoc). Set to NO to disable this.
-# The default value is: YES.
-
-BRIEF_MEMBER_DESC      = YES
-
-# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief
-# description of a member or function before the detailed description
-#
-# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-# The default value is: YES.
-
-REPEAT_BRIEF           = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator that is
-# used to form the text in various listings. Each string in this list, if found
-# as the leading text of the brief description, will be stripped from the text
-# and the result, after processing the whole list, is used as the annotated
-# text. Otherwise, the brief description is used as-is. If left blank, the
-# following values are used ($name is automatically replaced with the name of
-# the entity):The $name class, The $name widget, The $name file, is, provides,
-# specifies, contains, represents, a, an and the.
-
-ABBREVIATE_BRIEF       =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# doxygen will generate a detailed section even if there is only a brief
-# description.
-# The default value is: NO.
-
-ALWAYS_DETAILED_SEC    = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-# The default value is: NO.
-
-INLINE_INHERITED_MEMB  = NO
-
-# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path
-# before files name in the file list and in the header files. If set to NO the
-# shortest path that makes the file name unique will be used
-# The default value is: YES.
-
-FULL_PATH_NAMES        = YES
-
-# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
-# Stripping is only done if one of the specified strings matches the left-hand
-# part of the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the path to
-# strip.
-#
-# Note that you can specify absolute paths here, but also relative paths, which
-# will be relative from the directory where doxygen is started.
-# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-
-STRIP_FROM_PATH        =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
-# path mentioned in the documentation of a class, which tells the reader which
-# header file to include in order to use a class. If left blank only the name of
-# the header file containing the class definition is used. Otherwise one should
-# specify the list of include paths that are normally passed to the compiler
-# using the -I flag.
-
-STRIP_FROM_INC_PATH    =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
-# less readable) file names. This can be useful is your file systems doesn't
-# support long names like on DOS, Mac, or CD-ROM.
-# The default value is: NO.
-
-SHORT_NAMES            = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
-# first line (until the first dot) of a Javadoc-style comment as the brief
-# description. If set to NO, the Javadoc-style will behave just like regular Qt-
-# style comments (thus requiring an explicit @brief command for a brief
-# description.)
-# The default value is: NO.
-
-JAVADOC_AUTOBRIEF      = Yes
-
-# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
-# line (until the first dot) of a Qt-style comment as the brief description. If
-# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
-# requiring an explicit \brief command for a brief description.)
-# The default value is: NO.
-
-QT_AUTOBRIEF           = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
-# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
-# a brief description. This used to be the default behavior. The new default is
-# to treat a multi-line C++ comment block as a detailed description. Set this
-# tag to YES if you prefer the old behavior instead.
-#
-# Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
-# The default value is: NO.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
-# documentation from any documented member that it re-implements.
-# The default value is: YES.
-
-INHERIT_DOCS           = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a
-# new page for each member. If set to NO, the documentation of a member will be
-# part of the file/class/namespace that contains it.
-# The default value is: NO.
-
-SEPARATE_MEMBER_PAGES  = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
-# uses this value to replace tabs by spaces in code fragments.
-# Minimum value: 1, maximum value: 16, default value: 4.
-
-TAB_SIZE               = 4
-
-# This tag can be used to specify a number of aliases that act as commands in
-# the documentation. An alias has the form:
-# name=value
-# For example adding
-# "sideeffect=@par Side Effects:\n"
-# will allow you to put the command \sideeffect (or @sideeffect) in the
-# documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
-
-ALIASES                =
-
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST              =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
-# only. Doxygen will then generate output that is more tailored for C. For
-# instance, some of the names that are used will be different. The list of all
-# members will be omitted, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_FOR_C  = NO
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
-# Python sources only. Doxygen will then generate output that is more tailored
-# for that language. For instance, namespaces will be presented as packages,
-# qualified scopes will look different, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_JAVA   = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources. Doxygen will then generate output that is tailored for Fortran.
-# The default value is: NO.
-
-OPTIMIZE_FOR_FORTRAN   = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for VHDL.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_VHDL   = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make
-# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
-# (default is Fortran), use: inc=Fortran f=C.
-#
-# Note For files without extension you can use no_extension as a placeholder.
-#
-# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
-
-EXTENSION_MAPPING      =
-
-# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
-# according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you can
-# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
-# case of backward compatibilities issues.
-# The default value is: YES.
-
-MARKDOWN_SUPPORT       = YES
-
-# When enabled doxygen tries to link words that correspond to documented
-# classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by by putting a % sign in front of the word
-# or globally by setting AUTOLINK_SUPPORT to NO.
-# The default value is: YES.
-
-AUTOLINK_SUPPORT       = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should set this
-# tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-# The default value is: NO.
-
-BUILTIN_STL_SUPPORT    = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-# The default value is: NO.
-
-CPP_CLI_SUPPORT        = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
-# The default value is: NO.
-
-SIP_SUPPORT            = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate
-# getter and setter methods for a property. Setting this option to YES will make
-# doxygen to replace the get and set methods by a property in the documentation.
-# This will only work if the methods are indeed getting or setting a simple
-# type. If this is not the case, or you want to show the methods anyway, you
-# should set this option to NO.
-# The default value is: YES.
-
-IDL_PROPERTY_SUPPORT   = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-# The default value is: NO.
-
-DISTRIBUTE_GROUP_DOC   = NO
-
-# Set the SUBGROUPING tag to YES to allow class member groups of the same type
-# (for instance a group of public functions) to be put as a subgroup of that
-# type (e.g. under the Public Functions section). Set it to NO to prevent
-# subgrouping. Alternatively, this can be done per class using the
-# \nosubgrouping command.
-# The default value is: YES.
-
-SUBGROUPING            = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
-# are shown inside the group in which they are included (e.g. using \ingroup)
-# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
-# and RTF).
-#
-# Note that this feature does not work in combination with
-# SEPARATE_MEMBER_PAGES.
-# The default value is: NO.
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
-# with only public data fields or simple typedef fields will be shown inline in
-# the documentation of the scope in which they are defined (i.e. file,
-# namespace, or group documentation), provided this scope is documented. If set
-# to NO, structs, classes, and unions are shown on a separate page (for HTML and
-# Man pages) or section (for LaTeX and RTF).
-# The default value is: NO.
-
-INLINE_SIMPLE_STRUCTS  = NO
-
-# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
-# enum is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically be
-# useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-# The default value is: NO.
-
-TYPEDEF_HIDES_STRUCT   = NO
-
-# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
-# cache is used to resolve symbols given their name and scope. Since this can be
-# an expensive process and often the same symbol appears multiple times in the
-# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
-# doxygen will become slower. If the cache is too large, memory is wasted. The
-# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
-# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
-# symbols. At the end of a run doxygen will report the cache usage and suggest
-# the optimal cache size from a speed point of view.
-# Minimum value: 0, maximum value: 9, default value: 0.
-
-LOOKUP_CACHE_SIZE      = 0
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available. Private
-# class members and static file members will be hidden unless the
-# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
-# Note: This will also disable the warnings about undocumented members that are
-# normally produced when WARNINGS is set to YES.
-# The default value is: NO.
-
-EXTRACT_ALL            = YES
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
-# be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIVATE        = NO
-
-# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal
-# scope will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PACKAGE        = NO
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file will be
-# included in the documentation.
-# The default value is: NO.
-
-EXTRACT_STATIC         = NO
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO
-# only classes defined in header files are included. Does not have any effect
-# for Java sources.
-# The default value is: YES.
-
-EXTRACT_LOCAL_CLASSES  = YES
-
-# This flag is only useful for Objective-C code. When set to YES local methods,
-# which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO only methods in the interface are
-# included.
-# The default value is: NO.
-
-EXTRACT_LOCAL_METHODS  = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base name of
-# the file that contains the anonymous namespace. By default anonymous namespace
-# are hidden.
-# The default value is: NO.
-
-EXTRACT_ANON_NSPACES   = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
-# undocumented members inside documented classes or files. If set to NO these
-# members will be included in the various overviews, but no documentation
-# section is generated. This option has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_MEMBERS     = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy. If set
-# to NO these classes will be included in the various overviews. This option has
-# no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_CLASSES     = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO these declarations will be
-# included in the documentation.
-# The default value is: NO.
-
-HIDE_FRIEND_COMPOUNDS  = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO these
-# blocks will be appended to the function's detailed documentation block.
-# The default value is: NO.
-
-HIDE_IN_BODY_DOCS      = NO
-
-# The INTERNAL_DOCS tag determines if documentation that is typed after a
-# \internal command is included. If the tag is set to NO then the documentation
-# will be excluded. Set it to YES to include the internal documentation.
-# The default value is: NO.
-
-INTERNAL_DOCS          = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
-
-CASE_SENSE_NAMES       = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES the
-# scope will be hidden.
-# The default value is: NO.
-
-HIDE_SCOPE_NAMES       = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
-# the files that are included by a file in the documentation of that file.
-# The default value is: YES.
-
-SHOW_INCLUDE_FILES     = YES
-
-# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
-# grouped member an include statement to the documentation, telling the reader
-# which file to include in order to use the member.
-# The default value is: NO.
-
-SHOW_GROUPED_MEMB_INC  = NO
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
-# files with double quotes in the documentation rather than with sharp brackets.
-# The default value is: NO.
-
-FORCE_LOCAL_INCLUDES   = NO
-
-# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
-# documentation for inline members.
-# The default value is: YES.
-
-INLINE_INFO            = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
-# (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO the members will appear in declaration order.
-# The default value is: YES.
-
-SORT_MEMBER_DOCS       = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
-# descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO the members will appear in declaration order. Note that
-# this will also influence the order of the classes in the class list.
-# The default value is: NO.
-
-SORT_BRIEF_DOCS        = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
-# (brief and detailed) documentation of class members so that constructors and
-# destructors are listed first. If set to NO the constructors will appear in the
-# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
-# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
-# member documentation.
-# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
-# detailed member documentation.
-# The default value is: NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
-# of group names into alphabetical order. If set to NO the group names will
-# appear in their defined order.
-# The default value is: NO.
-
-SORT_GROUP_NAMES       = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
-# fully-qualified names, including namespaces. If set to NO, the class list will
-# be sorted only by class name, not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the alphabetical
-# list.
-# The default value is: NO.
-
-SORT_BY_SCOPE_NAME     = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
-# type resolution of all parameters of a function it will reject a match between
-# the prototype and the implementation of a member function even if there is
-# only one candidate or it is obvious which candidate to choose by doing a
-# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
-# accept a match between prototype and implementation in such cases.
-# The default value is: NO.
-
-STRICT_PROTO_MATCHING  = NO
-
-# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the
-# todo list. This list is created by putting \todo commands in the
-# documentation.
-# The default value is: YES.
-
-GENERATE_TODOLIST      = YES
-
-# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the
-# test list. This list is created by putting \test commands in the
-# documentation.
-# The default value is: YES.
-
-GENERATE_TESTLIST      = YES
-
-# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug
-# list. This list is created by putting \bug commands in the documentation.
-# The default value is: YES.
-
-GENERATE_BUGLIST       = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO)
-# the deprecated list. This list is created by putting \deprecated commands in
-# the documentation.
-# The default value is: YES.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional documentation
-# sections, marked by \if <section_label> ... \endif and \cond <section_label>
-# ... \endcond blocks.
-
-ENABLED_SECTIONS       =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
-# initial value of a variable or macro / define can have for it to appear in the
-# documentation. If the initializer consists of more lines than specified here
-# it will be hidden. Use a value of 0 to hide initializers completely. The
-# appearance of the value of individual variables and macros / defines can be
-# controlled using \showinitializer or \hideinitializer command in the
-# documentation regardless of this setting.
-# Minimum value: 0, maximum value: 10000, default value: 30.
-
-MAX_INITIALIZER_LINES  = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
-# the bottom of the documentation of classes and structs. If set to YES the list
-# will mention the files that were used to generate the documentation.
-# The default value is: YES.
-
-SHOW_USED_FILES        = YES
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
-# will remove the Files entry from the Quick Index and from the Folder Tree View
-# (if specified).
-# The default value is: YES.
-
-SHOW_FILES             = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
-# page. This will remove the Namespaces entry from the Quick Index and from the
-# Folder Tree View (if specified).
-# The default value is: YES.
-
-SHOW_NAMESPACES        = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command command input-file, where command is the value of the
-# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
-# by doxygen. Whatever the program writes to standard output is used as the file
-# version. For an example see the documentation.
-
-FILE_VERSION_FILTER    =
-
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
-# by doxygen. The layout file controls the global structure of the generated
-# output files in an output format independent way. To create the layout file
-# that represents doxygen's defaults, run doxygen with the -l option. You can
-# optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
-#
-# Note that if you run doxygen from a directory containing a file called
-# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
-# tag is left empty.
-
-LAYOUT_FILE            =
-
-# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
-# the reference definitions. This must be a list of .bib files. The .bib
-# extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
-# For LaTeX the style of the bibliography can be controlled using
-# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
-# search path. Do not use file names with spaces, bibtex cannot handle them. See
-# also \cite for info how to create references.
-
-CITE_BIB_FILES         =
-
-#---------------------------------------------------------------------------
-# Configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated to
-# standard output by doxygen. If QUIET is set to YES this implies that the
-# messages are off.
-# The default value is: NO.
-
-QUIET                  = NO
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES
-# this implies that the warnings are on.
-#
-# Tip: Turn warnings on while writing the documentation.
-# The default value is: YES.
-
-WARNINGS               = YES
-
-# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate
-# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
-# will automatically be disabled.
-# The default value is: YES.
-
-WARN_IF_UNDOCUMENTED   = YES
-
-# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
-# The default value is: YES.
-
-WARN_IF_DOC_ERROR      = YES
-
-# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
-# are documented, but have no documentation for their parameters or return
-# value. If set to NO doxygen will only warn about wrong or incomplete parameter
-# documentation, but not about the absence of documentation.
-# The default value is: NO.
-
-WARN_NO_PARAMDOC       = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that doxygen
-# can produce. The string should contain the $file, $line, and $text tags, which
-# will be replaced by the file and line number from which the warning originated
-# and the warning text. Optionally the format may contain $version, which will
-# be replaced by the version of the file (if it could be obtained via
-# FILE_VERSION_FILTER)
-# The default value is: $file:$line: $text.
-
-WARN_FORMAT            = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning and error
-# messages should be written. If left blank the output is written to standard
-# error (stderr).
-
-WARN_LOGFILE           =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag is used to specify the files and/or directories that contain
-# documented source files. You may enter file names like myfile.cpp or
-# directories like /usr/src/myproject. Separate the files or directories with
-# spaces.
-# Note: If this tag is empty the current directory is searched.
-
-INPUT                  = @CMAKE_CURRENT_SOURCE_DIR@
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
-# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: http://www.gnu.org/software/libiconv) for the list of
-# possible encodings.
-# The default value is: UTF-8.
-
-INPUT_ENCODING         = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank the
-# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii,
-# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp,
-# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown,
-# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf,
-# *.qsf, *.as and *.js.
-
-FILE_PATTERNS          =
-
-# The RECURSIVE tag can be used to specify whether or not subdirectories should
-# be searched for input files as well.
-# The default value is: NO.
-
-RECURSIVE              = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should be
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-#
-# Note that relative paths are relative to the directory from which doxygen is
-# run.
-
-EXCLUDE                =
-
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
-# directories that are symbolic links (a Unix file system feature) are excluded
-# from the input.
-# The default value is: NO.
-
-EXCLUDE_SYMLINKS       = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories.
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories for example use the pattern */test/*
-
-EXCLUDE_PATTERNS       = */examples/* */tools/* */unittests/*
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
-
-EXCLUDE_SYMBOLS        =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or directories
-# that contain example code fragments that are included (see the \include
-# command).
-
-EXAMPLE_PATH           = @CMAKE_CURRENT_SOURCE_DIR@/examples
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank all
-# files are included.
-
-EXAMPLE_PATTERNS       =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude commands
-# irrespective of the value of the RECURSIVE tag.
-# The default value is: NO.
-
-EXAMPLE_RECURSIVE      = YES
-
-# The IMAGE_PATH tag can be used to specify one or more files or directories
-# that contain images that are to be included in the documentation (see the
-# \image command).
-
-IMAGE_PATH             =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command:
-#
-# <filter> <input-file>
-#
-# where <filter> is the value of the INPUT_FILTER tag, and <input-file> is the
-# name of an input file. Doxygen will then use the output that the filter
-# program writes to standard output. If FILTER_PATTERNS is specified, this tag
-# will be ignored.
-#
-# Note that the filter must not add or remove lines; it is applied before the
-# code is scanned, but not when the output code is generated. If lines are added
-# or removed, the anchors will not be placed correctly.
-
-INPUT_FILTER           =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form: pattern=filter
-# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
-# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
-# patterns match the file name, INPUT_FILTER is applied.
-
-FILTER_PATTERNS        =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER ) will also be used to filter the input files that are used for
-# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
-# The default value is: NO.
-
-FILTER_SOURCE_FILES    = NO
-
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
-# it is also possible to disable source filtering for a specific pattern using
-# *.ext= (so without naming a filter).
-# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
-
-FILTER_SOURCE_PATTERNS =
-
-# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
-# is part of the input, its contents will be placed on the main page
-# (index.html). This can be useful if you have a project on for instance GitHub
-# and want to reuse the introduction page also for the doxygen output.
-
-USE_MDFILE_AS_MAINPAGE =
-
-#---------------------------------------------------------------------------
-# Configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
-# generated. Documented entities will be cross-referenced with these sources.
-#
-# Note: To get rid of all source code in the generated output, make sure that
-# also VERBATIM_HEADERS is set to NO.
-# The default value is: NO.
-
-SOURCE_BROWSER         = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body of functions,
-# classes and enums directly into the documentation.
-# The default value is: NO.
-
-INLINE_SOURCES         = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
-# special comment blocks from generated source code fragments. Normal C, C++ and
-# Fortran comments will always remain visible.
-# The default value is: YES.
-
-STRIP_CODE_COMMENTS    = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# function all documented functions referencing it will be listed.
-# The default value is: NO.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES then for each documented function
-# all documented entities called/used by that function will be listed.
-# The default value is: NO.
-
-REFERENCES_RELATION    = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
-# to YES, then the hyperlinks from functions in REFERENCES_RELATION and
-# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
-# link to the documentation.
-# The default value is: YES.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
-# source code will show a tooltip with additional information such as prototype,
-# brief description and links to the definition and documentation. Since this
-# will make the HTML file larger and loading of large files a bit slower, you
-# can opt to disable this feature.
-# The default value is: YES.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-SOURCE_TOOLTIPS        = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code will
-# point to the HTML generated by the htags(1) tool instead of doxygen built-in
-# source browser. The htags tool is part of GNU's global source tagging system
-# (see http://www.gnu.org/software/global/global.html). You will need version
-# 4.8.6 or higher.
-#
-# To use it do the following:
-# - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
-# - Make sure the INPUT points to the root of the source tree
-# - Run doxygen as normal
-#
-# Doxygen will invoke htags (and that will in turn invoke gtags), so these
-# tools must be available from the command line (i.e. in the search path).
-#
-# The result: instead of the source browser generated by doxygen, the links to
-# source code will now point to the output of htags.
-# The default value is: NO.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-USE_HTAGS              = NO
-
-# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
-# verbatim copy of the header file for each class for which an include is
-# specified. Set to NO to disable this.
-# See also: Section \class.
-# The default value is: YES.
-
-VERBATIM_HEADERS       = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
-# compounds will be generated. Enable this if the project contains a lot of
-# classes, structs, unions or interfaces.
-# The default value is: YES.
-
-ALPHABETICAL_INDEX     = YES
-
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX    = 1
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-IGNORE_PREFIX          =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output
-# The default value is: YES.
-
-GENERATE_HTML          = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_OUTPUT            = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
-# generated HTML page (for example: .htm, .php, .asp).
-# The default value is: .html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FILE_EXTENSION    = .html
-
-# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
-# each generated HTML page. If the tag is left blank doxygen will generate a
-# standard header.
-#
-# To get valid HTML the header file that includes any scripts and style sheets
-# that doxygen needs, which is dependent on the configuration options used (e.g.
-# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
-# default header using
-# doxygen -w html new_header.html new_footer.html new_stylesheet.css
-# YourConfigFile
-# and then modify the file new_header.html. See also section "Doxygen usage"
-# for information on how to generate the default header that doxygen normally
-# uses.
-# Note: The header is subject to change so you typically have to regenerate the
-# default header when upgrading to a newer version of doxygen. For a description
-# of the possible markers and block names see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_HEADER            =
-
-# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
-# generated HTML page. If the tag is left blank doxygen will generate a standard
-# footer. See HTML_HEADER for more information on how to generate a default
-# footer and what special commands can be used inside the footer. See also
-# section "Doxygen usage" for information on how to generate the default footer
-# that doxygen normally uses.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FOOTER            =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
-# sheet that is used by each HTML page. It can be used to fine-tune the look of
-# the HTML output. If left blank doxygen will generate a default style sheet.
-# See also section "Doxygen usage" for information on how to generate the style
-# sheet that doxygen normally uses.
-# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
-# it is more robust and this tag (HTML_STYLESHEET) will in the future become
-# obsolete.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_STYLESHEET        =
-
-# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user-
-# defined cascading style sheet that is included after the standard style sheets
-# created by doxygen. Using this option one can overrule certain style aspects.
-# This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefor more robust against future updates.
-# Doxygen will copy the style sheet file to the output directory. For an example
-# see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_STYLESHEET  = @CMAKE_CURRENT_SOURCE_DIR@/customdoxygen.css
-
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the HTML output directory. Note
-# that these files will be copied to the base HTML output directory. Use the
-# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
-# files will be copied as-is; there are no commands or markers available.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_FILES       =
-
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
-# will adjust the colors in the stylesheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
-# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
-# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
-# purple, and 360 is red again.
-# Minimum value: 0, maximum value: 359, default value: 220.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_HUE    = 220
-
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
-# value of 255 will produce the most vivid colors.
-# Minimum value: 0, maximum value: 255, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_SAT    = 100
-
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
-# luminance component of the colors in the HTML output. Values below 100
-# gradually make the output lighter, whereas values above 100 make the output
-# darker. The value divided by 100 is the actual gamma applied, so 80 represents
-# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
-# change the gamma.
-# Minimum value: 40, maximum value: 240, default value: 80.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_GAMMA  = 80
-
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to NO can help when comparing the output of multiple runs.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP         = YES
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_SECTIONS  = NO
-
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
-# shown in the various tree structured indices initially; the user can expand
-# and collapse entries dynamically later on. Doxygen will expand the tree to
-# such a level that at most the specified number of entries are visible (unless
-# a fully collapsed tree already exceeds this amount). So setting the number of
-# entries 1 will produce a full collapsed tree by default. 0 is a special value
-# representing an infinite number of entries and will result in a full expanded
-# tree by default.
-# Minimum value: 0, maximum value: 9999, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_INDEX_NUM_ENTRIES = 100
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files will be
-# generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: http://developer.apple.com/tools/xcode/), introduced with
-# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_DOCSET        = NO
-
-# This tag determines the name of the docset feed. A documentation feed provides
-# an umbrella under which multiple documentation sets from a single provider
-# (such as a company or product suite) can be grouped.
-# The default value is: Doxygen generated docs.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_FEEDNAME        = "Doxygen generated docs"
-
-# This tag specifies a string that should uniquely identify the documentation
-# set bundle. This should be a reverse domain-name style string, e.g.
-# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_BUNDLE_ID       = org.doxygen.Project
-
-# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
-# the documentation publisher. This should be a reverse domain-name style
-# string, e.g. com.mycompany.MyDocSet.documentation.
-# The default value is: org.doxygen.Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
-
-# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
-# The default value is: Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_NAME  = Publisher
-
-# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
-# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
-# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
-#
-# The HTML Help Workshop contains a compiler that can convert all HTML output
-# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
-# files are now used as the Windows 98 help format, and will replace the old
-# Windows help format (.hlp) on all Windows platforms in the future. Compressed
-# HTML files also contain an index, a table of contents, and you can search for
-# words in the documentation. The HTML workshop also contains a viewer for
-# compressed HTML files.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_HTMLHELP      = NO
-
-# The CHM_FILE tag can be used to specify the file name of the resulting .chm
-# file. You can add a path in front of the file if the result should not be
-# written to the html output directory.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_FILE               =
-
-# The HHC_LOCATION tag can be used to specify the location (absolute path
-# including file name) of the HTML help compiler ( hhc.exe). If non-empty
-# doxygen will try to run the HTML help compiler on the generated index.hhp.
-# The file has to be specified with full path.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-HHC_LOCATION           =
-
-# The GENERATE_CHI flag controls if a separate .chi index file is generated (
-# YES) or that it should be included in the master .chm file ( NO).
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-GENERATE_CHI           = NO
-
-# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc)
-# and project file content.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_INDEX_ENCODING     =
-
-# The BINARY_TOC flag controls whether a binary table of contents is generated (
-# YES) or a normal table of contents ( NO) in the .chm file.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-BINARY_TOC             = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members to
-# the table of contents of the HTML help documentation and to the tree view.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-TOC_EXPAND             = NO
-
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
-# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
-# (.qch) of the generated HTML documentation.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_QHP           = NO
-
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
-# the file name of the resulting .qch file. The path specified is relative to
-# the HTML output folder.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QCH_FILE               =
-
-# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
-# Project output. For more information please see Qt Help Project / Namespace
-# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_NAMESPACE          = org.doxygen.Project
-
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
-# Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
-# folders).
-# The default value is: doc.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_VIRTUAL_FOLDER     = doc
-
-# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
-# filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_NAME   =
-
-# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
-# custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_ATTRS  =
-
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
-# project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_SECT_FILTER_ATTRS  =
-
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHG_LOCATION           =
-
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
-# generated, together with the HTML files, they form an Eclipse help plugin. To
-# install this plugin and make it available under the help contents menu in
-# Eclipse, the contents of the directory containing the HTML and XML files needs
-# to be copied into the plugins directory of eclipse. The name of the directory
-# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
-# After copying Eclipse needs to be restarted before the help appears.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_ECLIPSEHELP   = NO
-
-# A unique identifier for the Eclipse help plugin. When installing the plugin
-# the directory name containing the HTML and XML files should also have this
-# name. Each documentation set should have its own identifier.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
-
-ECLIPSE_DOC_ID         = org.doxygen.Project
-
-# If you want full control over the layout of the generated HTML pages it might
-# be necessary to disable the index and replace it with your own. The
-# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
-# of each HTML page. A value of NO enables the index and the value YES disables
-# it. Since the tabs in the index contain the same information as the navigation
-# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-DISABLE_INDEX          = NO
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information. If the tag
-# value is set to YES, a side panel will be generated containing a tree-like
-# index structure (just like the one that is generated for HTML Help). For this
-# to work a browser that supports JavaScript, DHTML, CSS and frames is required
-# (i.e. any modern browser). Windows users are probably better off using the
-# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_TREEVIEW      = YES
-
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
-# doxygen will group on one line in the generated HTML documentation.
-#
-# Note that a value of 0 will completely suppress the enum values from appearing
-# in the overview section.
-# Minimum value: 0, maximum value: 20, default value: 4.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-ENUM_VALUES_PER_LINE   = 4
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
-# to set the initial width (in pixels) of the frame in which the tree is shown.
-# Minimum value: 0, maximum value: 1500, default value: 250.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-TREEVIEW_WIDTH         = 250
-
-# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to
-# external symbols imported via tag files in a separate window.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-EXT_LINKS_IN_WINDOW    = NO
-
-# Use this tag to change the font size of LaTeX formulas included as images in
-# the HTML documentation. When you change the font size after a successful
-# doxygen run you need to manually remove any form_*.png images from the HTML
-# output directory to force them to be regenerated.
-# Minimum value: 8, maximum value: 50, default value: 10.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_FONTSIZE       = 10
-
-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT    = YES
-
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# http://www.mathjax.org) which uses client side Javascript for the rendering
-# instead of using prerendered bitmaps. Use this if you do not have LaTeX
-# installed or if you want to formulas look prettier in the HTML output. When
-# enabled you may also need to install MathJax separately and configure the path
-# to it using the MATHJAX_RELPATH option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-USE_MATHJAX            = NO
-
-# When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
-# Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
-# The default value is: HTML-CSS.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_FORMAT         = HTML-CSS
-
-# When MathJax is enabled you need to specify the location relative to the HTML
-# output directory using the MATHJAX_RELPATH option. The destination directory
-# should contain the MathJax.js script. For instance, if the mathjax directory
-# is located at the same level as the HTML output directory, then
-# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
-# Content Delivery Network so you can quickly see the result without installing
-# MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from http://www.mathjax.org before deployment.
-# The default value is: http://cdn.mathjax.org/mathjax/latest.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_RELPATH        = http://cdn.mathjax.org/mathjax/latest
-
-# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
-# extension names that should be enabled during MathJax rendering. For example
-# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_EXTENSIONS     =
-
-# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
-# of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
-# example see the documentation.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_CODEFILE       =
-
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
-# the HTML output. The underlying search engine uses javascript and DHTML and
-# should work on any modern browser. Note that when using HTML help
-# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
-# there is already a search function so this one should typically be disabled.
-# For large projects the javascript based search engine can be slow, then
-# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
-# search using the keyboard; to jump to the search box use <access key> + S
-# (what the <access key> is depends on the OS and browser, but it is typically
-# <CTRL>, <ALT>/<option>, or both). Inside the search box use the <cursor down
-# key> to jump into the search results window, the results can be navigated
-# using the <cursor keys>. Press <Enter> to select an item or <escape> to cancel
-# the search. The filter options can be selected when the cursor is inside the
-# search box by pressing <Shift>+<cursor down>. Also here use the <cursor keys>
-# to select a filter and <Enter> or <escape> to activate or cancel the filter
-# option.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-SEARCHENGINE           = YES
-
-# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
-# implemented using a web server instead of a web client using Javascript. There
-# are two flavours of web server based searching depending on the
-# EXTERNAL_SEARCH setting. When disabled, doxygen will generate a PHP script for
-# searching and an index file used by the script. When EXTERNAL_SEARCH is
-# enabled the indexing and searching needs to be provided by external tools. See
-# the section "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SERVER_BASED_SEARCH    = NO
-
-# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
-# script for searching. Instead the search results are written to an XML file
-# which needs to be processed by an external indexer. Doxygen will invoke an
-# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
-# search results.
-#
-# Doxygen ships with an example indexer ( doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/).
-#
-# See the section "External Indexing and Searching" for details.
-# The default value is: NO.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH        = NO
-
-# The SEARCHENGINE_URL should point to a search engine hosted by a web server
-# which will return the search results when EXTERNAL_SEARCH is enabled.
-#
-# Doxygen ships with an example indexer ( doxyindexer) and search engine
-# (doxysearch.cgi) which are based on the open source search engine library
-# Xapian (see: http://xapian.org/). See the section "External Indexing and
-# Searching" for details.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHENGINE_URL       =
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
-# search data is written to a file for indexing by an external tool. With the
-# SEARCHDATA_FILE tag the name of this file can be specified.
-# The default file is: searchdata.xml.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-SEARCHDATA_FILE        = searchdata.xml
-
-# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
-# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
-# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
-# projects and redirect the results back to the right project.
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTERNAL_SEARCH_ID     =
-
-# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
-# projects other than the one defined by this configuration file, but that are
-# all added to the same external search index. Each project needs to have a
-# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
-# to a relative location where the documentation can be found. The format is:
-# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
-# This tag requires that the tag SEARCHENGINE is set to YES.
-
-EXTRA_SEARCH_MAPPINGS  =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES doxygen will generate LaTeX output.
-# The default value is: YES.
-
-GENERATE_LATEX         = YES
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: latex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_OUTPUT           = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked.
-#
-# Note that when enabling USE_PDFLATEX this option is only used for generating
-# bitmaps for formulas in the HTML output, but not in the Makefile that is
-# written to the output directory.
-# The default file is: latex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_CMD_NAME         = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
-# index for LaTeX.
-# The default file is: makeindex.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-MAKEINDEX_CMD_NAME     = makeindex
-
-# If the COMPACT_LATEX tag is set to YES doxygen generates more compact LaTeX
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-COMPACT_LATEX          = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used by the
-# printer.
-# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
-# 14 inches) and executive (7.25 x 10.5 inches).
-# The default value is: a4.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PAPER_TYPE             = a4
-
-# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
-# that should be included in the LaTeX output. To get the times font for
-# instance you can specify
-# EXTRA_PACKAGES=times
-# If left blank no extra packages will be included.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-EXTRA_PACKAGES         =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
-# generated LaTeX document. The header should contain everything until the first
-# chapter. If it is left blank doxygen will generate a standard header. See
-# section "Doxygen usage" for information on how to let doxygen write the
-# default header to a separate file.
-#
-# Note: Only use a user-defined header if you know what you are doing! The
-# following commands have a special meaning inside the header: $title,
-# $datetime, $date, $doxygenversion, $projectname, $projectnumber. Doxygen will
-# replace them by respectively the title of the page, the current date and time,
-# only the current date, the version number of doxygen, the project name (see
-# PROJECT_NAME), or the project number (see PROJECT_NUMBER).
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HEADER           =
-
-# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
-# generated LaTeX document. The footer should contain everything after the last
-# chapter. If it is left blank doxygen will generate a standard footer.
-#
-# Note: Only use a user-defined footer if you know what you are doing!
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_FOOTER           =
-
-# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the LATEX_OUTPUT output
-# directory. Note that the files will be copied as-is; there are no commands or
-# markers available.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_EXTRA_FILES      =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
-# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
-# contain links (just like the HTML output) instead of page references. This
-# makes the output suitable for online browsing using a PDF viewer.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-PDF_HYPERLINKS         = YES
-
-# If the LATEX_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
-# the PDF file directly from the LaTeX files. Set this option to YES to get a
-# higher quality PDF documentation.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-USE_PDFLATEX           = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
-# command to the generated LaTeX files. This will instruct LaTeX to keep running
-# if errors occur, instead of asking the user for help. This option is also used
-# when generating formulas in HTML.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BATCHMODE        = NO
-
-# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
-# index chapters (such as File Index, Compound Index, etc.) in the output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_HIDE_INDICES     = NO
-
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
-# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
-# bibliography, e.g. plainnat, or ieeetr. See
-# http://en.wikipedia.org/wiki/BibTeX and \cite for more info.
-# The default value is: plain.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_BIB_STYLE        = plain
-
-#---------------------------------------------------------------------------
-# Configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES doxygen will generate RTF output. The
-# RTF output is optimized for Word 97 and may not look too pretty with other RTF
-# readers/editors.
-# The default value is: NO.
-
-GENERATE_RTF           = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: rtf.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_OUTPUT             = rtf
-
-# If the COMPACT_RTF tag is set to YES doxygen generates more compact RTF
-# documents. This may be useful for small projects and may help to save some
-# trees in general.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-COMPACT_RTF            = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
-# contain hyperlink fields. The RTF file will contain links (just like the HTML
-# output) instead of page references. This makes the output suitable for online
-# browsing using Word or some other Word compatible readers that support those
-# fields.
-#
-# Note: WordPad (write) and others do not support links.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_HYPERLINKS         = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's config
-# file, i.e. a series of assignments. You only have to provide replacements,
-# missing definitions are set to their default value.
-#
-# See also section "Doxygen usage" for information on how to generate the
-# default style sheet that doxygen normally uses.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_STYLESHEET_FILE    =
-
-# Set optional variables used in the generation of an RTF document. Syntax is
-# similar to doxygen's config file. A template extensions file can be generated
-# using doxygen -e rtf extensionFile.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_EXTENSIONS_FILE    =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES doxygen will generate man pages for
-# classes and files.
-# The default value is: NO.
-
-GENERATE_MAN           = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it. A directory man3 will be created inside the directory specified by
-# MAN_OUTPUT.
-# The default directory is: man.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_OUTPUT             = man
-
-# The MAN_EXTENSION tag determines the extension that is added to the generated
-# man pages. In case the manual section does not start with a number, the number
-# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
-# optional.
-# The default value is: .3.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_EXTENSION          = .3
-
-# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
-# will generate one additional man file for each entity documented in the real
-# man page(s). These additional files only source the real man page, but without
-# them the man command would be unable to find the correct page.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_MAN is set to YES.
-
-MAN_LINKS              = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES doxygen will generate an XML file that
-# captures the structure of the code including all documentation.
-# The default value is: NO.
-
-GENERATE_XML           = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: xml.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_OUTPUT             = xml
-
-# The XML_SCHEMA tag can be used to specify a XML schema, which can be used by a
-# validating XML parser to check the syntax of the XML files.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_SCHEMA             =
-
-# The XML_DTD tag can be used to specify a XML DTD, which can be used by a
-# validating XML parser to check the syntax of the XML files.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_DTD                =
-
-# If the XML_PROGRAMLISTING tag is set to YES doxygen will dump the program
-# listings (including syntax highlighting and cross-referencing information) to
-# the XML output. Note that enabling this will significantly increase the size
-# of the XML output.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_XML is set to YES.
-
-XML_PROGRAMLISTING     = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to the DOCBOOK output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_DOCBOOK tag is set to YES doxygen will generate Docbook files
-# that can be used to generate PDF.
-# The default value is: NO.
-
-GENERATE_DOCBOOK       = NO
-
-# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
-# front of it.
-# The default directory is: docbook.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_OUTPUT         = docbook
-
-#---------------------------------------------------------------------------
-# Configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES doxygen will generate an AutoGen
-# Definitions (see http://autogen.sf.net) file that captures the structure of
-# the code including all documentation. Note that this feature is still
-# experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_AUTOGEN_DEF   = NO
-
-#---------------------------------------------------------------------------
-# Configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES doxygen will generate a Perl module
-# file that captures the structure of the code including all documentation.
-#
-# Note that this feature is still experimental and incomplete at the moment.
-# The default value is: NO.
-
-GENERATE_PERLMOD       = NO
-
-# If the PERLMOD_LATEX tag is set to YES doxygen will generate the necessary
-# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
-# output from the Perl module output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_LATEX          = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be nicely
-# formatted so it can be parsed by a human reader. This is useful if you want to
-# understand what is going on. On the other hand, if this tag is set to NO the
-# size of the Perl module output will be much smaller and Perl will parse it
-# just the same.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_PRETTY         = YES
-
-# The names of the make variables in the generated doxyrules.make file are
-# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
-# so different doxyrules.make files included by the same Makefile don't
-# overwrite each other's variables.
-# This tag requires that the tag GENERATE_PERLMOD is set to YES.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES doxygen will evaluate all
-# C-preprocessor directives found in the sources and include files.
-# The default value is: YES.
-
-ENABLE_PREPROCESSING   = YES
-
-# If the MACRO_EXPANSION tag is set to YES doxygen will expand all macro names
-# in the source code. If set to NO only conditional compilation will be
-# performed. Macro expansion can be done in a controlled way by setting
-# EXPAND_ONLY_PREDEF to YES.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-MACRO_EXPANSION        = NO
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
-# the macro expansion is limited to the macros specified with the PREDEFINED and
-# EXPAND_AS_DEFINED tags.
-# The default value is: NO.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_ONLY_PREDEF     = NO
-
-# If the SEARCH_INCLUDES tag is set to YES the includes files in the
-# INCLUDE_PATH will be searched if a #include is found.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SEARCH_INCLUDES        = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by the
-# preprocessor.
-# This tag requires that the tag SEARCH_INCLUDES is set to YES.
-
-INCLUDE_PATH           =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will be
-# used.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-INCLUDE_FILE_PATTERNS  =
-
-# The PREDEFINED tag can be used to specify one or more macro names that are
-# defined before the preprocessor is started (similar to the -D option of e.g.
-# gcc). The argument of the tag is a list of macros of the form: name or
-# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
-# is assumed. To prevent a macro definition from being undefined via #undef or
-# recursively expanded use the := operator instead of the = operator.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-PREDEFINED             =
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
-# tag can be used to specify a list of macro names that should be expanded. The
-# macro definition that is found in the sources will be used. Use the PREDEFINED
-# tag if you want to use a different macro definition that overrules the
-# definition found in the source code.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-EXPAND_AS_DEFINED      =
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
-# remove all refrences to function-like macros that are alone on a line, have an
-# all uppercase name, and do not end with a semicolon. Such function macros are
-# typically used for boiler-plate code, and will confuse the parser if not
-# removed.
-# The default value is: YES.
-# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-
-SKIP_FUNCTION_MACROS   = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES tag can be used to specify one or more tag files. For each tag
-# file the location of the external documentation should be added. The format of
-# a tag file without this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where loc1 and loc2 can be relative or absolute paths or URLs. See the
-# section "Linking to external documentation" for more information about the use
-# of tag files.
-# Note: Each tag file must have an unique name (where the name does NOT include
-# the path). If a tag file is not located in the directory in which doxygen is
-# run, you must also specify the path to the tagfile here.
-
-TAGFILES               =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
-# tag file that is based on the input files it reads. See section "Linking to
-# external documentation" for more information about the usage of tag files.
-
-GENERATE_TAGFILE       =
-
-# If the ALLEXTERNALS tag is set to YES all external class will be listed in the
-# class index. If set to NO only the inherited external classes will be listed.
-# The default value is: NO.
-
-ALLEXTERNALS           = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed in
-# the modules index. If set to NO, only the current project's groups will be
-# listed.
-# The default value is: YES.
-
-EXTERNAL_GROUPS        = YES
-
-# If the EXTERNAL_PAGES tag is set to YES all external pages will be listed in
-# the related pages index. If set to NO, only the current project's pages will
-# be listed.
-# The default value is: YES.
-
-EXTERNAL_PAGES         = YES
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of 'which perl').
-# The default file (with absolute path) is: /usr/bin/perl.
-
-PERL_PATH              = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see:
-# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH            =
-
-# You can include diagrams made with dia in doxygen documentation. Doxygen will
-# then run dia to produce the diagram and insert it in the documentation. The
-# DIA_PATH tag allows you to specify the directory where the dia binary resides.
-# If left empty dia is assumed to be found in the default search path.
-
-DIA_PATH               =
-
-# If set to YES, the inheritance and collaboration graphs will hide inheritance
-# and usage relations if the target is undocumented or is not a class.
-# The default value is: YES.
-
-HIDE_UNDOC_RELATIONS   = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz (see:
-# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
-# Bell Labs. The other options in this section have no effect if this option is
-# set to NO
-# The default value is: NO.
-
-HAVE_DOT               = NO
-
-# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
-# to run in parallel. When set to 0 doxygen will base this on the number of
-# processors available in the system. You can set it explicitly to a value
-# larger than 0 to get control over the balance between CPU load and processing
-# speed.
-# Minimum value: 0, maximum value: 32, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_NUM_THREADS        = 0
-
-# When you want a differently looking font n the dot files that doxygen
-# generates you can specify the font name using DOT_FONTNAME. You need to make
-# sure dot is able to find the font, which can be done by putting it in a
-# standard location or by setting the DOTFONTPATH environment variable or by
-# setting DOT_FONTPATH to the directory containing the font.
-# The default value is: Helvetica.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTNAME           = Helvetica
-
-# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
-# dot graphs.
-# Minimum value: 4, maximum value: 24, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTSIZE           = 10
-
-# By default doxygen will tell dot to use the default font as specified with
-# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
-# the path where dot can find it using this tag.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_FONTPATH           =
-
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CLASS_GRAPH            = YES
-
-# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
-# graph for each documented class showing the direct and indirect implementation
-# dependencies (inheritance, containment, and class references variables) of the
-# class with other documented classes.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-COLLABORATION_GRAPH    = YES
-
-# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
-# groups, showing the direct groups dependencies.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GROUP_GRAPHS           = YES
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LOOK               = NO
-
-# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
-# class node. If there are many fields or methods and many nodes the graph may
-# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
-# number of items for each type to make the size more manageable. Set this to 0
-# for no limit. Note that the threshold may be exceeded by 50% before the limit
-# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
-# but if the number exceeds 15, the total amount of fields shown is limited to
-# 10.
-# Minimum value: 0, maximum value: 100, default value: 10.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-UML_LIMIT_NUM_FIELDS   = 10
-
-# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
-# collaboration graphs will show the relations between templates and their
-# instances.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-TEMPLATE_RELATIONS     = NO
-
-# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
-# YES then doxygen will generate a graph for each documented file showing the
-# direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDE_GRAPH          = YES
-
-# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
-# set to YES then doxygen will generate a graph for each documented file showing
-# the direct and indirect include dependencies of the file with other documented
-# files.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INCLUDED_BY_GRAPH      = YES
-
-# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable call graphs for selected
-# functions only using the \callgraph command.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALL_GRAPH             = NO
-
-# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
-# dependency graph for every global function or class method.
-#
-# Note that enabling this option will significantly increase the time of a run.
-# So in most cases it will be better to enable caller graphs for selected
-# functions only using the \callergraph command.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-CALLER_GRAPH           = NO
-
-# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
-# hierarchy of all classes instead of a textual one.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GRAPHICAL_HIERARCHY    = YES
-
-# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
-# dependencies a directory has on other directories in a graphical way. The
-# dependency relations are determined by the #include relations between the
-# files in the directories.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DIRECTORY_GRAPH        = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot.
-# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
-# to make the SVG files visible in IE 9+ (other browsers do not have this
-# requirement).
-# Possible values are: png, jpg, gif and svg.
-# The default value is: png.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_IMAGE_FORMAT       = png
-
-# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
-# enable generation of interactive SVG images that allow zooming and panning.
-#
-# Note that this requires a modern browser other than Internet Explorer. Tested
-# and working are Firefox, Chrome, Safari, and Opera.
-# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
-# the SVG files visible. Older versions of IE do not have SVG support.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-INTERACTIVE_SVG        = NO
-
-# The DOT_PATH tag can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_PATH               =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the \dotfile
-# command).
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOTFILE_DIRS           =
-
-# The MSCFILE_DIRS tag can be used to specify one or more directories that
-# contain msc files that are included in the documentation (see the \mscfile
-# command).
-
-MSCFILE_DIRS           =
-
-# The DIAFILE_DIRS tag can be used to specify one or more directories that
-# contain dia files that are included in the documentation (see the \diafile
-# command).
-
-DIAFILE_DIRS           =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
-# that will be shown in the graph. If the number of nodes in a graph becomes
-# larger than this value, doxygen will truncate the graph, which is visualized
-# by representing a node as a red box. Note that doxygen if the number of direct
-# children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
-# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-# Minimum value: 0, maximum value: 10000, default value: 50.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_GRAPH_MAX_NODES    = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
-# generated by dot. A depth value of 3 means that only nodes reachable from the
-# root by following a path via at most 3 edges will be shown. Nodes that lay
-# further from the root node will be omitted. Note that setting this option to 1
-# or 2 may greatly reduce the computation time needed for large code bases. Also
-# note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-# Minimum value: 0, maximum value: 1000, default value: 0.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-MAX_DOT_GRAPH_DEPTH    = 0
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is disabled by default, because dot on Windows does not seem
-# to support this out of the box.
-#
-# Warning: Depending on the platform used, enabling this option may lead to
-# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
-# read).
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_TRANSPARENT        = NO
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10) support
-# this, this feature is disabled by default.
-# The default value is: NO.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_MULTI_TARGETS      = YES
-
-# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
-# explaining the meaning of the various boxes and arrows in the dot generated
-# graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-GENERATE_LEGEND        = YES
-
-# If the DOT_CLEANUP tag is set to YES doxygen will remove the intermediate dot
-# files that are used to generate the various graphs.
-# The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
-
-DOT_CLEANUP            = YES
diff --git a/parallel-libs/streamexecutor/README.txt b/parallel-libs/streamexecutor/README.txt
deleted file mode 100644
index b8ec47d3d5a..00000000000
--- a/parallel-libs/streamexecutor/README.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-StreamExecutor
-==============
-
-StreamExecutor is a wrapper around CUDA and OpenCL (host-side) programming
-models (runtimes). This abstraction cleanly permits host code to target either
-CUDA or OpenCL devices with identically-functioning data parallel kernels. It
-manages the execution of concurrent work targeting the accelerator, similar to a
-host-side Executor.
-
-This version of StreamExecutor can be built either as a sub-project of the LLVM
-project or as a standalone project depending on LLVM as an external package.
diff --git a/parallel-libs/streamexecutor/customdoxygen.css b/parallel-libs/streamexecutor/customdoxygen.css
deleted file mode 100644
index a40ac950e3f..00000000000
--- a/parallel-libs/streamexecutor/customdoxygen.css
+++ /dev/null
@@ -1,20 +0,0 @@
-body {
-  background-color: #e0e0eb;
-}
-
-div.header {
-  margin-left: auto;
-  margin-right: auto;
-  max-width: 60em;
-  padding-left: 2em;
-  padding-right: 2em;
-}
-
-div.contents {
-  margin-left: auto;
-  margin-right: auto;
-  max-width: 60em;
-  background-color: white;
-  padding: 2em;
-  border-radius: 1em;
-}
diff --git a/parallel-libs/streamexecutor/examples/CMakeLists.txt b/parallel-libs/streamexecutor/examples/CMakeLists.txt
deleted file mode 100644
index cb061d5ca96..00000000000
--- a/parallel-libs/streamexecutor/examples/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_executable(cuda_saxpy_example CUDASaxpy.cpp)
-target_link_libraries(cuda_saxpy_example streamexecutor)
-
-add_executable(host_saxpy_example HostSaxpy.cpp)
-target_link_libraries(host_saxpy_example streamexecutor)
diff --git a/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp b/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp
deleted file mode 100644
index 6b2c59e5cd6..00000000000
--- a/parallel-libs/streamexecutor/examples/CUDASaxpy.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- CUDASaxpy.cpp - Example of CUDA saxpy with StreamExecutor API -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains example code demonstrating the usage of the
-/// StreamExecutor API. Snippets of this file will be included as code examples
-/// in documentation. Taking these examples from a real source file guarantees
-/// that the examples will always compile.
-///
-//===----------------------------------------------------------------------===//
-
-#include <algorithm>
-#include <cassert>
-#include <cstdlib>
-#include <vector>
-
-#include "streamexecutor/StreamExecutor.h"
-
-/// [Example saxpy compiler-generated]
-// Code in this namespace is generated by the compiler (e.g. clang).
-//
-// The name of this namespace may depend on the compiler that generated it, so
-// this is just an example name.
-namespace __compilergen {
-
-// Specialization of the streamexecutor::Kernel template class for the parameter
-// types of the saxpy(float A, float *X, float *Y) kernel.
-using SaxpyKernel =
-    streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>,
-                           streamexecutor::GlobalDeviceMemory<float>>;
-
-// A string containing the PTX code generated by the device compiler for the
-// saxpy kernel. String contents not shown here.
-extern const char *SaxpyPTX;
-
-// A global instance of a loader spec that knows how to load the code in the
-// SaxpyPTX string.
-static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() {
-  streamexecutor::MultiKernelLoaderSpec Spec;
-  Spec.addCUDAPTXInMemory("saxpy", {{{2, 0}, SaxpyPTX}});
-  return Spec;
-}();
-
-} // namespace __compilergen
-/// [Example saxpy compiler-generated]
-
-/// [Example saxpy host PTX]
-// The PTX text for a saxpy kernel.
-const char *__compilergen::SaxpyPTX = R"(
-  .version 4.3
-  .target sm_20
-  .address_size 64
-
-  .visible .entry saxpy(.param .f32 A, .param .u64 X, .param .u64 Y) {
-    .reg .f32 %AValue;
-    .reg .f32 %XValue;
-    .reg .f32 %YValue;
-    .reg .f32 %Result;
-
-    .reg .b64 %XBaseAddrGeneric;
-    .reg .b64 %YBaseAddrGeneric;
-    .reg .b64 %XBaseAddrGlobal;
-    .reg .b64 %YBaseAddrGlobal;
-    .reg .b64 %XAddr;
-    .reg .b64 %YAddr;
-    .reg .b64 %ThreadByteOffset;
-
-    .reg .b32 %TID;
-
-    ld.param.f32 %AValue, [A];
-    ld.param.u64 %XBaseAddrGeneric, [X];
-    ld.param.u64 %YBaseAddrGeneric, [Y];
-    cvta.to.global.u64 %XBaseAddrGlobal, %XBaseAddrGeneric;
-    cvta.to.global.u64 %YBaseAddrGlobal, %YBaseAddrGeneric;
-    mov.u32 %TID, %tid.x;
-    mul.wide.u32 %ThreadByteOffset, %TID, 4;
-    add.s64 %XAddr, %ThreadByteOffset, %XBaseAddrGlobal;
-    add.s64 %YAddr, %ThreadByteOffset, %YBaseAddrGlobal;
-    ld.global.f32 %XValue, [%XAddr];
-    ld.global.f32 %YValue, [%YAddr];
-    fma.rn.f32 %Result, %AValue, %XValue, %YValue;
-    st.global.f32 [%XAddr], %Result;
-    ret;
-  }
-)";
-/// [Example saxpy host PTX]
-
-int main() {
-  /// [Example saxpy host main]
-  namespace se = ::streamexecutor;
-  namespace cg = ::__compilergen;
-
-  // Create some host data.
-  float A = 42.0f;
-  std::vector<float> HostX = {0, 1, 2, 3};
-  std::vector<float> HostY = {4, 5, 6, 7};
-  size_t ArraySize = HostX.size();
-
-  // Get a device object.
-  se::Platform *Platform =
-      getOrDie(se::PlatformManager::getPlatformByName("CUDA"));
-  if (Platform->getDeviceCount() == 0) {
-    return EXIT_FAILURE;
-  }
-  se::Device Device = getOrDie(Platform->getDevice(0));
-
-  // Load the kernel onto the device.
-  cg::SaxpyKernel Kernel =
-      getOrDie(Device.createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));
-
-  se::RegisteredHostMemory<float> RegisteredX =
-      getOrDie(Device.registerHostMemory<float>(HostX));
-  se::RegisteredHostMemory<float> RegisteredY =
-      getOrDie(Device.registerHostMemory<float>(HostY));
-
-  // Allocate memory on the device.
-  se::GlobalDeviceMemory<float> X =
-      getOrDie(Device.allocateDeviceMemory<float>(ArraySize));
-  se::GlobalDeviceMemory<float> Y =
-      getOrDie(Device.allocateDeviceMemory<float>(ArraySize));
-
-  // Run operations on a stream.
-  se::Stream Stream = getOrDie(Device.createStream());
-  Stream.thenCopyH2D(RegisteredX, X)
-      .thenCopyH2D(RegisteredY, Y)
-      .thenLaunch(ArraySize, 1, Kernel, A, X, Y)
-      .thenCopyD2H(X, RegisteredX);
-  // Wait for the stream to complete.
-  se::dieIfError(Stream.blockHostUntilDone());
-
-  // Process output data in HostX.
-  std::vector<float> ExpectedX = {4, 47, 90, 133};
-  assert(std::equal(ExpectedX.begin(), ExpectedX.end(), HostX.begin()));
-  /// [Example saxpy host main]
-}
diff --git a/parallel-libs/streamexecutor/examples/HostSaxpy.cpp b/parallel-libs/streamexecutor/examples/HostSaxpy.cpp
deleted file mode 100644
index cf81b0ba915..00000000000
--- a/parallel-libs/streamexecutor/examples/HostSaxpy.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- HostSaxpy.cpp - Example of host saxpy with StreamExecutor API -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains example code demonstrating the usage of the
-/// StreamExecutor API for a host platform.
-///
-//===----------------------------------------------------------------------===//
-
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <vector>
-
-#include "streamexecutor/StreamExecutor.h"
-
-void Saxpy(float A, float *X, float *Y, size_t N) {
-  for (size_t I = 0; I < N; ++I)
-    X[I] = A * X[I] + Y[I];
-}
-
-namespace __compilergen {
-using SaxpyKernel =
-    streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>,
-                           streamexecutor::GlobalDeviceMemory<float>, size_t>;
-
-// Wrapper function converts argument addresses to arguments.
-void SaxpyWrapper(const void *const *ArgumentAddresses) {
-  Saxpy(*static_cast<const float *>(ArgumentAddresses[0]),
-        *static_cast<float **>(const_cast<void *>(ArgumentAddresses[1])),
-        *static_cast<float **>(const_cast<void *>(ArgumentAddresses[2])),
-        *static_cast<const size_t *>(ArgumentAddresses[3]));
-}
-
-// The wrapper function is what gets registered.
-static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() {
-  streamexecutor::MultiKernelLoaderSpec Spec;
-  Spec.addHostFunction("Saxpy", SaxpyWrapper);
-  return Spec;
-}();
-} // namespace __compilergen
-
-int main() {
-  namespace se = ::streamexecutor;
-  namespace cg = ::__compilergen;
-
-  // Create some host data.
-  float A = 42.0f;
-  std::vector<float> HostX = {0, 1, 2, 3};
-  std::vector<float> HostY = {4, 5, 6, 7};
-  size_t ArraySize = HostX.size();
-
-  // Get a device object.
-  se::Platform *Platform =
-      getOrDie(se::PlatformManager::getPlatformByName("host"));
-  if (Platform->getDeviceCount() == 0) {
-    return EXIT_FAILURE;
-  }
-  se::Device Device = getOrDie(Platform->getDevice(0));
-
-  // Load the kernel onto the device.
-  cg::SaxpyKernel Kernel =
-      getOrDie(Device.createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));
-
-  se::RegisteredHostMemory<float> RegisteredX =
-      getOrDie(Device.registerHostMemory<float>(HostX));
-  se::RegisteredHostMemory<float> RegisteredY =
-      getOrDie(Device.registerHostMemory<float>(HostY));
-
-  // Allocate memory on the device.
-  se::GlobalDeviceMemory<float> X =
-      getOrDie(Device.allocateDeviceMemory<float>(ArraySize));
-  se::GlobalDeviceMemory<float> Y =
-      getOrDie(Device.allocateDeviceMemory<float>(ArraySize));
-
-  // Run operations on a stream.
-  se::Stream Stream = getOrDie(Device.createStream());
-  Stream.thenCopyH2D(RegisteredX, X)
-      .thenCopyH2D(RegisteredY, Y)
-      .thenLaunch(1, 1, Kernel, A, X, Y, ArraySize)
-      .thenCopyD2H(X, RegisteredX);
-  // Wait for the stream to complete.
-  se::dieIfError(Stream.blockHostUntilDone());
-
-  // Process output data in HostX.
-  std::vector<float> ExpectedX = {4, 47, 90, 133};
-  assert(std::equal(ExpectedX.begin(), ExpectedX.end(), HostX.begin()));
-}
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Device.h b/parallel-libs/streamexecutor/include/streamexecutor/Device.h
deleted file mode 100644
index bf73655aea4..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/Device.h
+++ /dev/null
@@ -1,302 +0,0 @@
-//===-- Device.h - The Device class -----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// The Device class which represents a single device of a specific platform.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_DEVICE_H
-#define STREAMEXECUTOR_DEVICE_H
-
-#include <type_traits>
-
-#include "streamexecutor/Error.h"
-#include "streamexecutor/HostMemory.h"
-#include "streamexecutor/KernelSpec.h"
-#include "streamexecutor/PlatformDevice.h"
-
-namespace streamexecutor {
-
-class Stream;
-
-/// A class representing a StreamExecutor device.
-///
-/// Device instances are basically just pointers to the underlying
-/// implementation, so they are small and can be passed around by value.
-class Device {
-public:
-  explicit Device(PlatformDevice *PDevice);
-  virtual ~Device();
-
-  /// Gets the name of this device.
-  std::string getName() const { return PDevice->getName(); }
-
-  /// Creates a kernel object for this device.
-  template <typename KernelT>
-  Expected<typename std::enable_if<std::is_base_of<KernelBase, KernelT>::value,
-                                   KernelT>::type>
-  createKernel(const MultiKernelLoaderSpec &Spec) {
-    Expected<const void *> MaybeKernelHandle = PDevice->createKernel(Spec);
-    if (!MaybeKernelHandle)
-      return MaybeKernelHandle.takeError();
-    return KernelT(PDevice, *MaybeKernelHandle, Spec.getKernelName());
-  }
-
-  /// Creates a stream object for this device.
-  Expected<Stream> createStream();
-
-  /// Allocates an array of ElementCount entries of type T in device memory.
-  template <typename T>
-  Expected<GlobalDeviceMemory<T>> allocateDeviceMemory(size_t ElementCount) {
-    Expected<void *> MaybeMemory =
-        PDevice->allocateDeviceMemory(ElementCount * sizeof(T));
-    if (!MaybeMemory)
-      return MaybeMemory.takeError();
-    return GlobalDeviceMemory<T>(this, *MaybeMemory, ElementCount);
-  }
-
-  /// Registers a previously allocated host array of type T for asynchronous
-  /// memory operations.
-  ///
-  /// Host memory registered by this function can be used for asynchronous
-  /// memory copies on streams. See Stream::thenCopyD2H and Stream::thenCopyH2D.
-  template <typename T>
-  Expected<RegisteredHostMemory<T>>
-  registerHostMemory(llvm::MutableArrayRef<T> Memory) {
-    if (Error E = PDevice->registerHostMemory(Memory.data(),
-                                              Memory.size() * sizeof(T)))
-      return std::move(E);
-    return RegisteredHostMemory<T>(this, Memory.data(), Memory.size());
-  }
-
-  /// \anchor DeviceHostSyncCopyGroup
-  /// \name Host-synchronous device memory copying functions
-  ///
-  /// These methods block the calling host thread while copying data to or from
-  /// device memory. On the device side, these methods do not block any ongoing
-  /// device calls.
-  ///
-  /// There are no restrictions on the host memory that is used as a source or
-  /// destination in these copy methods, so there is no need to register that
-  /// host memory with registerHostMemory.
-  ///
-  /// Each of these methods has a single template parameter, T, that specifies
-  /// the type of data being copied. The ElementCount arguments specify the
-  /// number of objects of type T to be copied.
-  ///
-  /// For ease of use, each of the methods is overloaded to take either a
-  /// GlobalDeviceMemorySlice or a GlobalDeviceMemory argument in the device
-  /// memory argument slots, and the GlobalDeviceMemory arguments are just
-  /// converted to GlobalDeviceMemorySlice arguments internally by using
-  /// GlobalDeviceMemory::asSlice.
-  ///
-  /// These methods perform bounds checking to make sure that the ElementCount
-  /// is not too large for the source or destination. For methods that do not
-  /// take an ElementCount argument, an error is returned if the source size
-  /// does not exactly match the destination size.
-  ///@{
-
-  template <typename T>
-  Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
-                           llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
-    if (ElementCount > Src.getElementCount())
-      return make_error("copying too many elements, " +
-                        llvm::Twine(ElementCount) +
-                        ", from a device array of element count " +
-                        llvm::Twine(Src.getElementCount()));
-    if (ElementCount > Dst.size())
-      return make_error(
-          "copying too many elements, " + llvm::Twine(ElementCount) +
-          ", to a host array of element count " + llvm::Twine(Dst.size()));
-    return PDevice->synchronousCopyD2H(Src.getBaseMemory().getHandle(),
-                                       Src.getElementOffset() * sizeof(T),
-                                       Dst.data(), 0, ElementCount * sizeof(T));
-  }
-
-  template <typename T>
-  Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src,
-                           llvm::MutableArrayRef<T> Dst) {
-    if (Src.getElementCount() != Dst.size())
-      return make_error(
-          "array size mismatch for D2H, device source has element count " +
-          llvm::Twine(Src.getElementCount()) +
-          " but host destination has element count " + llvm::Twine(Dst.size()));
-    return synchronousCopyD2H(Src, Dst, Src.getElementCount());
-  }
-
-  template <typename T>
-  Error synchronousCopyD2H(GlobalDeviceMemorySlice<T> Src, T *Dst,
-                           size_t ElementCount) {
-    return synchronousCopyD2H(Src, llvm::MutableArrayRef<T>(Dst, ElementCount),
-                              ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src,
-                           llvm::MutableArrayRef<T> Dst, size_t ElementCount) {
-    return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src,
-                           llvm::MutableArrayRef<T> Dst) {
-    return synchronousCopyD2H(Src.asSlice(), Dst);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2H(const GlobalDeviceMemory<T> &Src, T *Dst,
-                           size_t ElementCount) {
-    return synchronousCopyD2H(Src.asSlice(), Dst, ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
-                           GlobalDeviceMemorySlice<T> Dst,
-                           size_t ElementCount) {
-    if (ElementCount > Src.size())
-      return make_error(
-          "copying too many elements, " + llvm::Twine(ElementCount) +
-          ", from a host array of element count " + llvm::Twine(Src.size()));
-    if (ElementCount > Dst.getElementCount())
-      return make_error("copying too many elements, " +
-                        llvm::Twine(ElementCount) +
-                        ", to a device array of element count " +
-                        llvm::Twine(Dst.getElementCount()));
-    return PDevice->synchronousCopyH2D(
-        Src.data(), 0, Dst.getBaseMemory().getHandle(),
-        Dst.getElementOffset() * sizeof(T), ElementCount * sizeof(T));
-  }
-
-  template <typename T>
-  Error synchronousCopyH2D(llvm::ArrayRef<T> Src,
-                           GlobalDeviceMemorySlice<T> Dst) {
-    if (Src.size() != Dst.getElementCount())
-      return make_error(
-          "array size mismatch for H2D, host source has element count " +
-          llvm::Twine(Src.size()) +
-          " but device destination has element count " +
-          llvm::Twine(Dst.getElementCount()));
-    return synchronousCopyH2D(Src, Dst, Dst.getElementCount());
-  }
-
-  template <typename T>
-  Error synchronousCopyH2D(T *Src, GlobalDeviceMemorySlice<T> Dst,
-                           size_t ElementCount) {
-    return synchronousCopyH2D(llvm::ArrayRef<T>(Src, ElementCount), Dst,
-                              ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst,
-                           size_t ElementCount) {
-    return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyH2D(llvm::ArrayRef<T> Src, GlobalDeviceMemory<T> &Dst) {
-    return synchronousCopyH2D(Src, Dst.asSlice());
-  }
-
-  template <typename T>
-  Error synchronousCopyH2D(T *Src, GlobalDeviceMemory<T> &Dst,
-                           size_t ElementCount) {
-    return synchronousCopyH2D(Src, Dst.asSlice(), ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                           GlobalDeviceMemorySlice<T> Dst,
-                           size_t ElementCount) {
-    if (ElementCount > Src.getElementCount())
-      return make_error("copying too many elements, " +
-                        llvm::Twine(ElementCount) +
-                        ", from a device array of element count " +
-                        llvm::Twine(Src.getElementCount()));
-    if (ElementCount > Dst.getElementCount())
-      return make_error("copying too many elements, " +
-                        llvm::Twine(ElementCount) +
-                        ", to a device array of element count " +
-                        llvm::Twine(Dst.getElementCount()));
-    return PDevice->synchronousCopyD2D(
-        Src.getBaseMemory().getHandle(), Src.getElementOffset() * sizeof(T),
-        Dst.getBaseMemory().getHandle(), Dst.getElementOffset() * sizeof(T),
-        ElementCount * sizeof(T));
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                           GlobalDeviceMemorySlice<T> Dst) {
-    if (Src.getElementCount() != Dst.getElementCount())
-      return make_error(
-          "array size mismatch for D2D, device source has element count " +
-          llvm::Twine(Src.getElementCount()) +
-          " but device destination has element count " +
-          llvm::Twine(Dst.getElementCount()));
-    return synchronousCopyD2D(Src, Dst, Src.getElementCount());
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
-                           GlobalDeviceMemorySlice<T> Dst,
-                           size_t ElementCount) {
-    return synchronousCopyD2D(Src.asSlice(), Dst, ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
-                           GlobalDeviceMemorySlice<T> Dst) {
-    return synchronousCopyD2D(Src.asSlice(), Dst);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                           GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
-    return synchronousCopyD2D(Src, Dst.asSlice(), ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(GlobalDeviceMemorySlice<T> Src,
-                           GlobalDeviceMemory<T> &Dst) {
-    return synchronousCopyD2D(Src, Dst.asSlice());
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
-                           GlobalDeviceMemory<T> &Dst, size_t ElementCount) {
-    return synchronousCopyD2D(Src.asSlice(), Dst.asSlice(), ElementCount);
-  }
-
-  template <typename T>
-  Error synchronousCopyD2D(const GlobalDeviceMemory<T> &Src,
-                           GlobalDeviceMemory<T> &Dst) {
-    return synchronousCopyD2D(Src.asSlice(), Dst.asSlice());
-  }
-
-  ///@} End host-synchronous device memory copying functions
-
-private:
-  // Only a GlobalDeviceMemoryBase may free device memory.
-  friend GlobalDeviceMemoryBase;
-  Error freeDeviceMemory(const GlobalDeviceMemoryBase &Memory) {
-    return PDevice->freeDeviceMemory(Memory.getHandle());
-  }
-
-  // Only destroyRegisteredHostMemoryInternals may unregister host memory.
-  friend void internal::destroyRegisteredHostMemoryInternals(Device *, void *);
-  Error unregisterHostMemory(const void *Pointer) {
-    return PDevice->unregisterHostMemory(Pointer);
-  }
-
-  PlatformDevice *PDevice;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_DEVICE_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h b/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h
deleted file mode 100644
index 62f6e579933..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/DeviceMemory.h
+++ /dev/null
@@ -1,278 +0,0 @@
-//===-- DeviceMemory.h - Types representing device memory -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file defines types that represent device memory buffers. Two memory
-/// spaces are represented here: global and shared. Host code can have a handle
-/// to device global memory, and that handle can be used to copy data to and
-/// from the device. Host code cannot have a handle to device shared memory
-/// because that memory only exists during the execution of a kernel.
-///
-/// GlobalDeviceMemory<T> is a handle to an array of elements of type T in
-/// global device memory. It is similar to a pair of a std::unique_ptr<T> and an
-/// element count to tell how many elements of type T fit in the memory pointed
-/// to by that T*.
-///
-/// SharedDeviceMemory<T> is just the size in elements of an array of elements
-/// of type T in device shared memory. No resources are actually attached to
-/// this class, it is just like a memo to the device to allocate space in shared
-/// memory.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_DEVICEMEMORY_H
-#define STREAMEXECUTOR_DEVICEMEMORY_H
-
-#include <cassert>
-#include <cstddef>
-
-#include "streamexecutor/Error.h"
-
-namespace streamexecutor {
-
-class Device;
-
-template <typename ElemT> class GlobalDeviceMemory;
-
-/// Reference to a slice of device memory.
-///
-/// Contains a base memory handle, an element count offset into that base
-/// memory, and an element count for the size of the slice.
-template <typename ElemT> class GlobalDeviceMemorySlice {
-public:
-  using ElementTy = ElemT;
-
-  /// Intentionally implicit so GlobalDeviceMemory<T> can be passed to functions
-  /// expecting GlobalDeviceMemorySlice<T> arguments.
-  GlobalDeviceMemorySlice(const GlobalDeviceMemory<ElemT> &Memory)
-      : BaseMemory(Memory), ElementOffset(0),
-        ElementCount(Memory.getElementCount()) {}
-
-  GlobalDeviceMemorySlice(const GlobalDeviceMemory<ElemT> &BaseMemory,
-                          size_t ElementOffset, size_t ElementCount)
-      : BaseMemory(BaseMemory), ElementOffset(ElementOffset),
-        ElementCount(ElementCount) {
-    assert(ElementOffset + ElementCount <= BaseMemory.getElementCount() &&
-           "slicing past the end of a GlobalDeviceMemory buffer");
-  }
-
-  /// Gets the GlobalDeviceMemory backing this slice.
-  const GlobalDeviceMemory<ElemT> &getBaseMemory() const { return BaseMemory; }
-
-  /// Gets the offset of this slice from the base memory.
-  ///
-  /// The offset is measured in elements, not bytes.
-  size_t getElementOffset() const { return ElementOffset; }
-
-  /// Gets the number of elements in this slice.
-  size_t getElementCount() const { return ElementCount; }
-
-  /// Returns the number of bytes that can fit in this slice.
-  size_t getByteCount() const { return ElementCount * sizeof(ElemT); }
-
-  /// Creates a slice of the memory with the first DropCount elements removed.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  GlobalDeviceMemorySlice<ElemT> slice(size_t DropCount) const {
-    assert(DropCount <= ElementCount &&
-           "dropping more than the size of a slice");
-    return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset + DropCount,
-                                          ElementCount - DropCount);
-  }
-
-  /// Creates a slice of the memory with the last DropCount elements removed.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  GlobalDeviceMemorySlice<ElemT> drop_back(size_t DropCount) const {
-    assert(DropCount <= ElementCount &&
-           "dropping more than the size of a slice");
-    return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset,
-                                          ElementCount - DropCount);
-  }
-
-  /// Creates a slice of the memory that chops off the first DropCount elements
-  /// and keeps the next TakeCount elements.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  GlobalDeviceMemorySlice<ElemT> slice(size_t DropCount,
-                                       size_t TakeCount) const {
-    assert(DropCount + TakeCount <= ElementCount &&
-           "sub-slice operation overruns slice bounds");
-    return GlobalDeviceMemorySlice<ElemT>(BaseMemory, ElementOffset + DropCount,
-                                          TakeCount);
-  }
-
-private:
-  const GlobalDeviceMemory<ElemT> &BaseMemory;
-  size_t ElementOffset;
-  size_t ElementCount;
-};
-
-/// Wrapper around a generic global device memory allocation.
-///
-/// This class represents a buffer of untyped bytes in the global memory space
-/// of a device. See GlobalDeviceMemory<T> for the corresponding type that
-/// includes type information for the elements in its buffer.
-///
-/// This is effectively a pair consisting of an opaque handle and a buffer size
-/// in bytes. The opaque handle is a platform-dependent handle to the actual
-/// memory that is allocated on the device.
-///
-/// In some cases, such as in the CUDA platform, the opaque handle may actually
-/// be a pointer in the virtual address space and it may be valid to perform
-/// arithmetic on it to obtain other device pointers, but this is not the case
-/// in general.
-///
-/// For example, in the OpenCL platform, the handle is a pointer to a _cl_mem
-/// handle object which really is completely opaque to the user.
-class GlobalDeviceMemoryBase {
-public:
-  /// Returns an opaque handle to the underlying memory.
-  const void *getHandle() const { return Handle; }
-
-  /// Returns the address of the opaque handle as stored by this object.
-  const void *const *getHandleAddress() const { return &Handle; }
-
-  // Cannot copy because the handle must be owned by a single object.
-  GlobalDeviceMemoryBase(const GlobalDeviceMemoryBase &) = delete;
-  GlobalDeviceMemoryBase &operator=(const GlobalDeviceMemoryBase &) = delete;
-
-protected:
-  /// Creates a GlobalDeviceMemoryBase from a handle and a byte count.
-  GlobalDeviceMemoryBase(Device *D, const void *Handle, size_t ByteCount)
-      : TheDevice(D), Handle(Handle), ByteCount(ByteCount) {}
-
-  /// Transfer ownership of the underlying handle.
-  GlobalDeviceMemoryBase(GlobalDeviceMemoryBase &&Other) noexcept
-      : TheDevice(Other.TheDevice), Handle(Other.Handle),
-        ByteCount(Other.ByteCount) {
-    Other.TheDevice = nullptr;
-    Other.Handle = nullptr;
-    Other.ByteCount = 0;
-  }
-
-  GlobalDeviceMemoryBase &operator=(GlobalDeviceMemoryBase &&Other) noexcept {
-    TheDevice = Other.TheDevice;
-    Handle = Other.Handle;
-    ByteCount = Other.ByteCount;
-    Other.TheDevice = nullptr;
-    Other.Handle = nullptr;
-    Other.ByteCount = 0;
-    return *this;
-  }
-
-  ~GlobalDeviceMemoryBase();
-
-  Device *TheDevice;  // Pointer to the device on which this memory lives.
-  const void *Handle; // Platform-dependent value representing allocated memory.
-  size_t ByteCount;   // Size in bytes of this allocation.
-};
-
-/// Typed wrapper around the "void *"-like GlobalDeviceMemoryBase class.
-///
-/// For example, GlobalDeviceMemory<int> is a simple wrapper around
-/// GlobalDeviceMemoryBase that represents a buffer of integers stored in global
-/// device memory.
-template <typename ElemT>
-class GlobalDeviceMemory : public GlobalDeviceMemoryBase {
-public:
-  using ElementTy = ElemT;
-
-  GlobalDeviceMemory(GlobalDeviceMemory &&) noexcept;
-  GlobalDeviceMemory &operator=(GlobalDeviceMemory &&) noexcept;
-
-  /// Returns the number of elements of type ElemT that constitute this
-  /// allocation.
-  size_t getElementCount() const { return ByteCount / sizeof(ElemT); }
-
-  /// Returns the number of bytes that can fit in this memory buffer.
-  size_t getByteCount() const { return ByteCount; }
-
-  /// Converts this memory object into a slice.
-  GlobalDeviceMemorySlice<ElemT> asSlice() const {
-    return GlobalDeviceMemorySlice<ElemT>(*this);
-  }
-
-private:
-  GlobalDeviceMemory(const GlobalDeviceMemory &) = delete;
-  GlobalDeviceMemory &operator=(const GlobalDeviceMemory &) = delete;
-
-  // Only a Device can create a GlobalDeviceMemory instance.
-  friend Device;
-  GlobalDeviceMemory(Device *D, const void *Handle, size_t ElementCount)
-      : GlobalDeviceMemoryBase(D, Handle, ElementCount * sizeof(ElemT)) {}
-};
-
-template <typename ElemT>
-GlobalDeviceMemory<ElemT>::GlobalDeviceMemory(
-    GlobalDeviceMemory<ElemT> &&) noexcept = default;
-
-template <typename ElemT>
-GlobalDeviceMemory<ElemT> &GlobalDeviceMemory<ElemT>::
-operator=(GlobalDeviceMemory<ElemT> &&) noexcept = default;
-
-/// A class to represent the size of a dynamic shared memory buffer of elements
-/// of type T on a device.
-///
-/// Shared memory buffers exist only on the device and cannot be manipulated
-/// from the host, so instances of this class do not have an opaque handle, only
-/// a size.
-///
-/// This type of memory is called "local" memory in OpenCL and "shared" memory
-/// in CUDA, and both platforms follow the rule that the host code only knows
-/// the size of these buffers and does not have a handle to them.
-///
-/// The treatment of shared memory in StreamExecutor matches the way it is done
-/// in OpenCL, where a kernel takes any number of shared memory sizes as kernel
-/// function arguments.
-///
-/// In CUDA only one shared memory size argument is allowed per kernel call.
-/// StreamExecutor handles this by allowing CUDA kernel signatures that take
-/// multiple SharedDeviceMemory arguments, and simply adding together all the
-/// shared memory sizes to get the final shared memory size that is used to
-/// launch the kernel.
-template <typename ElemT> class SharedDeviceMemory {
-public:
-  /// Creates a typed area of shared device memory with a given number of
-  /// elements.
-  static SharedDeviceMemory<ElemT> makeFromElementCount(size_t ElementCount) {
-    return SharedDeviceMemory(ElementCount);
-  }
-
-  /// Copyable because it is just an array size.
-  SharedDeviceMemory(const SharedDeviceMemory &) = default;
-
-  /// Copy-assignable because it is just an array size.
-  SharedDeviceMemory &operator=(const SharedDeviceMemory &) = default;
-
-  /// Returns the number of elements of type ElemT that can fit in this memory
-  /// buffer.
-  size_t getElementCount() const { return ElementCount; }
-
-  /// Returns the number of bytes that can fit in this memory buffer.
-  size_t getByteCount() const { return ElementCount * sizeof(ElemT); }
-
-  /// Returns whether this is a single-element memory buffer.
-  bool isScalar() const { return getElementCount() == 1; }
-
-private:
-  /// Constructs a SharedDeviceMemory instance from an element count.
-  ///
-  /// This constructor is not public because there is a potential for confusion
-  /// between the size of the buffer in bytes and the size of the buffer in
-  /// elements.
-  ///
-  /// The static method makeFromElementCount is provided for users of this class
-  /// because its name makes the meaning of the size parameter clear.
-  explicit SharedDeviceMemory(size_t ElementCount)
-      : ElementCount(ElementCount) {}
-
-  size_t ElementCount;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_DEVICEMEMORY_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Error.h b/parallel-libs/streamexecutor/include/streamexecutor/Error.h
deleted file mode 100644
index d33a5a6a79a..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/Error.h
+++ /dev/null
@@ -1,215 +0,0 @@
-//===-- Error.h - Error handling --------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Error types used in the public API and internally in StreamExecutor.
-///
-/// StreamExecutor's error handling is based on the types streamexecutor::Error
-/// and streamexecutor::Expected<T>.
-///
-///
-/// \section error The Error Class
-///
-/// The Error class either represents success or contains an error message
-/// describing the cause of the error. Error instances are created by calling
-/// Error::success for successes or make_error for errors.
-///
-/// \code{.cpp}
-/// Error achieveWorldPeace() {
-///   if (WorldPeaceAlreadyAchieved) {
-///     return Error::success();
-///   } else {
-///     return make_error("Can't someone else do it?");
-///   }
-/// }
-/// \endcode
-///
-/// Error instances are implicitly convertible to bool. Error values convert to
-/// true and successes convert to false. Error instances must have their boolean
-/// values checked or they must be moved before they go out of scope, otherwise
-/// their destruction will cause the program to abort with a warning about an
-/// unchecked Error.
-///
-/// If the Error represents success, then checking the boolean value is all that
-/// is required, but if the Error represents a real error, the Error value must
-/// be consumed. The function consumeAndGetMessage is the way to extract the
-/// error message from an Error and consume the Error at the same time, so
-/// typical error handling will first check whether there was an error and then
-/// extract the error message if so. Here is an example:
-///
-/// \code{.cpp}
-/// if (Error E = achieveWorldPeace()) {
-///   printf("An error occurred: %s\n", consumeAndGetMessage(E).c_str());
-///   exit(EXIT_FAILURE):
-/// }
-/// \endcode
-///
-/// It is also common to simply pass an error along up the call stack if it
-/// cannot be handled in the current function.
-///
-/// \code{.cpp}
-/// Error doTask() {
-///   if (Error E = achieveWorldPeace()) {
-///     return E;
-///   }
-///   ...
-/// }
-/// \endcode
-///
-/// There is also a function consumeError that consumes an error value without
-/// fetching the error message. This is useful when we want to ignore an error.
-///
-/// The dieIfError function is also provided for quick-and-dirty error handling.
-///
-///
-/// \section expected The Expected Class
-///
-/// The Expected<T> class either represents a value of type T or an Error.
-/// Expected<T> has one constructor that takes a T value and another constructor
-/// that takes an Error rvalue reference, so Expected instances can be
-/// constructed either from values or from errors:
-///
-/// \code{.cpp}
-/// Expected<int> getMyFavoriteInt() {
-///   int MyFavorite = 42;
-///   if (IsThereAFavorite) {
-///     return MyFavorite;
-///   } else {
-///     return make_error("I don't have a favorite");
-///   }
-/// }
-/// \endcode
-///
-/// Expected<T> instances are implicitly convertible to bool and are true if
-/// they contain a value and false if they contain an error. Note that this is
-/// the opposite convention of the Error type conversion to bool, where true
-/// meant error and false meant success.
-///
-/// If the Expected<T> instance is not an error, the stored value can be
-/// obtained by using operator*. If access to members of the value are desired
-/// instead of the value itself, operator-> can be used as well.
-///
-/// Expected<T> instances must have their boolean value checked or they must be
-/// moved before they go out of scope, otherwise they will cause the program to
-/// abort with a warning about an unchecked error. If the Expected<T> instance
-/// contains a value, then checking the boolean value is all that is required,
-/// but if it contains an Error object, that Error object must be handled by
-/// calling Expected<T>::takeError() to get the underlying error.
-///
-/// Here is an example of the use of an Expected<T> value returned from a
-/// function:
-///
-/// \code{.cpp}
-/// Expected<int> ExpectedInt = getMyFavoriteInt();
-/// if (ExpectedInt) {
-///   printf("My favorite integer is %d\n", *ExpectedInt);
-/// } else {
-///   printf("An error occurred: %s\n",
-///     consumeAndGetMessage(ExpectedInt.takeError()));
-///   exit(EXIT_FAILURE);
-/// }
-/// \endcode
-///
-/// The following snippet shows some examples of how Errors and Expected values
-/// can be passed up the stack if they should not be handled in the current
-/// function.
-///
-/// \code{.cpp}
-/// Expected<double> doTask3() {
-///   Error WorldPeaceError = achieveWorldPeace();
-///   if (!WorldPeaceError) {
-///     return WorldPeaceError;
-///   }
-///
-///   Expected<martian> ExpectedMartian = getMyFavoriteMartian();
-///   if (!ExpectedMartian) {
-///     // Must extract the error because martian cannot be converted to double.
-///     return ExpectedMartian.takeError():
-///   }
-///
-///   // It's fine to return Expected<int> for Expected<double> because int can
-///   // be converted to double.
-///   return getMyFavoriteInt();
-/// }
-/// \endcode
-///
-/// The getOrDie function is also available for quick-and-dirty error handling.
-///
-///
-/// \section llvm Relation to llvm::Error and llvm::Expected
-///
-/// The streamexecutor::Error and streamexecutor::Expected classes are actually
-/// just their LLVM counterparts redeclared in the streamexectuor namespace, but
-/// they should be treated as separate types, even so.
-///
-/// StreamExecutor does not support any underlying llvm::ErrorInfo class except
-/// the one it defines internally for itself, so a streamexecutor::Error can be
-/// thought of as a restricted llvm::Error that is guaranteed to hold a specific
-/// error type.
-///
-/// Although code may compile if llvm functions used to handle these
-/// StreamExecutor error types, it is likely that code will lead to runtime
-/// errors, so it is strongly recommended that only the functions from the
-/// streamexecutor namespace are used on these StreamExecutor error types.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_ERROR_H
-#define STREAMEXECUTOR_ERROR_H
-
-#include <cstdio>
-#include <cstdlib>
-#include <memory>
-#include <string>
-
-#include "llvm/Support/Error.h"
-
-namespace streamexecutor {
-
-using llvm::consumeError;
-using llvm::Error;
-using llvm::Expected;
-using llvm::Twine;
-
-/// Makes an Error object from an error message.
-Error make_error(const Twine &Message);
-
-/// Consumes the input error and returns its error message.
-///
-/// Assumes the input was created by the make_error function above.
-std::string consumeAndGetMessage(Error &&E);
-
-/// Extracts the T value from an Expected<T> or prints an error message to
-/// stderr and exits the program with code EXIT_FAILURE if the Expected<T> is an
-/// error.
-///
-/// This function and the dieIfError function are provided for applications that
-/// are OK with aborting the program if an error occurs, and which don't have
-/// any special error logging needs. Applications with different error handling
-/// needs will likely want to declare their own functions with similar
-/// signatures but which log error messages in a different way or attempt to
-/// recover from errors instead of aborting the program.
-template <typename T> T getOrDie(Expected<T> &&E) {
-  if (!E) {
-    std::fprintf(stderr, "Error extracting an expected value: %s.\n",
-                 consumeAndGetMessage(E.takeError()).c_str());
-    std::exit(EXIT_FAILURE);
-  }
-  return std::move(*E);
-}
-
-/// Prints an error message to stderr and exits the program with code
-/// EXIT_FAILURE if the input is an error.
-///
-/// \sa getOrDie
-void dieIfError(Error &&E);
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_ERROR_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/HostMemory.h b/parallel-libs/streamexecutor/include/streamexecutor/HostMemory.h
deleted file mode 100644
index 18ff184ba68..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/HostMemory.h
+++ /dev/null
@@ -1,195 +0,0 @@
-//===-- HostMemory.h - Types for registered host memory ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-///
-/// This file defines types that represent registered host memory buffers. Host
-/// memory must be registered to participate in asynchronous copies to or from
-/// device memory.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_HOSTMEMORY_H
-#define STREAMEXECUTOR_HOSTMEMORY_H
-
-#include <cassert>
-#include <cstddef>
-#include <type_traits>
-
-#include "llvm/ADT/ArrayRef.h"
-
-namespace streamexecutor {
-
-class Device;
-template <typename ElemT> class RegisteredHostMemory;
-
-/// A mutable slice of registered host memory.
-///
-/// The memory is registered in the sense of
-/// streamexecutor::Device::registerHostMemory.
-///
-/// Holds a reference to an underlying registered host memory buffer. Must not
-/// be used after the underlying buffer is freed or unregistered.
-template <typename ElemT> class MutableRegisteredHostMemorySlice {
-public:
-  using ElementTy = ElemT;
-
-  MutableRegisteredHostMemorySlice(RegisteredHostMemory<ElemT> &Registered)
-      : MutableArrayRef(Registered.getPointer(), Registered.getElementCount()) {
-  }
-
-  ElemT *getPointer() const { return MutableArrayRef.data(); }
-  size_t getElementCount() const { return MutableArrayRef.size(); }
-
-  /// Chops off the first DropCount elements of the slice.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  MutableRegisteredHostMemorySlice slice(size_t DropCount) const {
-    return MutableRegisteredHostMemorySlice(MutableArrayRef.slice(DropCount));
-  }
-
-  /// Chops off the first DropCount elements of the slice and keeps the next
-  /// TakeCount elements.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  MutableRegisteredHostMemorySlice slice(size_t DropCount,
-                                         size_t TakeCount) const {
-    return MutableRegisteredHostMemorySlice(
-        MutableArrayRef.slice(DropCount, TakeCount));
-  }
-
-  /// Chops off the last DropCount elements of the slice.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  MutableRegisteredHostMemorySlice drop_back(size_t DropCount) const {
-    return MutableRegisteredHostMemorySlice(
-        MutableArrayRef.drop_back(DropCount));
-  }
-
-private:
-  MutableRegisteredHostMemorySlice(llvm::MutableArrayRef<ElemT> MutableArrayRef)
-      : MutableArrayRef(MutableArrayRef) {}
-
-  llvm::MutableArrayRef<ElemT> MutableArrayRef;
-};
-
-/// An immutable slice of registered host memory.
-///
-/// The memory is registered in the sense of
-/// streamexecutor::Device::registerHostMemory.
-///
-/// Holds a reference to an underlying registered host memory buffer. Must not
-/// be used after the underlying buffer is freed or unregistered.
-template <typename ElemT> class RegisteredHostMemorySlice {
-public:
-  using ElementTy = ElemT;
-
-  RegisteredHostMemorySlice(const RegisteredHostMemory<ElemT> &Registered)
-      : ArrayRef(Registered.getPointer(), Registered.getElementCount()) {}
-
-  RegisteredHostMemorySlice(
-      MutableRegisteredHostMemorySlice<ElemT> MutableSlice)
-      : ArrayRef(MutableSlice.getPointer(), MutableSlice.getElementCount()) {}
-
-  const ElemT *getPointer() const { return ArrayRef.data(); }
-  size_t getElementCount() const { return ArrayRef.size(); }
-
-  /// Chops off the first N elements of the slice.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  RegisteredHostMemorySlice slice(size_t N) const {
-    return RegisteredHostMemorySlice(ArrayRef.slice(N));
-  }
-
-  /// Chops off the first N elements of the slice and keeps the next M elements.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  RegisteredHostMemorySlice slice(size_t N, size_t M) const {
-    return RegisteredHostMemorySlice(ArrayRef.slice(N, M));
-  }
-
-  /// Chops off the last N elements of the slice.
-  LLVM_ATTRIBUTE_UNUSED_RESULT
-  RegisteredHostMemorySlice drop_back(size_t N) const {
-    return RegisteredHostMemorySlice(ArrayRef.drop_back(N));
-  }
-
-private:
-  llvm::ArrayRef<ElemT> ArrayRef;
-};
-
-namespace internal {
-
-/// Helper function to unregister host memory.
-///
-/// This is a thin wrapper around streamexecutor::Device::unregisterHostMemory.
-/// It is defined so this operation can be performed from the destructor of the
-/// template class RegisteredHostMemory without including Device.h in this
-/// header and creating a header inclusion cycle.
-void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer);
-
-} // namespace internal
-
-/// Registered host memory that knows how to unregister itself upon destruction.
-///
-/// The memory is registered in the sense of
-/// streamexecutor::Device::registerHostMemory.
-///
-/// ElemT is the type of element stored in the host buffer.
-template <typename ElemT> class RegisteredHostMemory {
-public:
-  using ElementTy = ElemT;
-
-  RegisteredHostMemory(Device *TheDevice, ElemT *Pointer, size_t ElementCount)
-      : TheDevice(TheDevice), Pointer(Pointer), ElementCount(ElementCount) {
-    assert(TheDevice != nullptr && "cannot construct a "
-                                   "RegisteredHostMemoryBase with a null "
-                                   "platform device");
-  }
-
-  RegisteredHostMemory(const RegisteredHostMemory &) = delete;
-  RegisteredHostMemory &operator=(const RegisteredHostMemory &) = delete;
-
-  RegisteredHostMemory(RegisteredHostMemory &&Other) noexcept
-      : TheDevice(Other.TheDevice), Pointer(Other.Pointer),
-        ElementCount(Other.ElementCount) {
-    Other.TheDevice = nullptr;
-    Other.Pointer = nullptr;
-  }
-
-  RegisteredHostMemory &operator=(RegisteredHostMemory &&Other) noexcept {
-    TheDevice = Other.TheDevice;
-    Pointer = Other.Pointer;
-    ElementCount = Other.ElementCount;
-    Other.TheDevice = nullptr;
-    Other.Pointer = nullptr;
-  }
-
-  ~RegisteredHostMemory() {
-    internal::destroyRegisteredHostMemoryInternals(TheDevice, Pointer);
-  }
-
-  ElemT *getPointer() { return static_cast<ElemT *>(Pointer); }
-  const ElemT *getPointer() const { return static_cast<ElemT *>(Pointer); }
-  size_t getElementCount() const { return ElementCount; }
-
-  /// Creates an immutable slice for the entire contents of this memory.
-  RegisteredHostMemorySlice<ElemT> asSlice() const {
-    return RegisteredHostMemorySlice<ElemT>(*this);
-  }
-
-  /// Creates a mutable slice for the entire contents of this memory.
-  MutableRegisteredHostMemorySlice<ElemT> asSlice() {
-    return MutableRegisteredHostMemorySlice<ElemT>(*this);
-  }
-
-private:
-  Device *TheDevice;
-  void *Pointer;
-  size_t ElementCount;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_HOSTMEMORY_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h b/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
deleted file mode 100644
index eb023816428..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/Kernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-//===-- Kernel.h - StreamExecutor kernel types ------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Types to represent device kernels (code compiled to run on GPU or other
-/// accelerator).
-///
-/// See the \ref index "main page" for an example of how a compiler-generated
-/// specialization of the Kernel class template can be used along with the
-/// streamexecutor::Stream::thenLaunch method to create a typesafe interface for
-/// kernel launches.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_KERNEL_H
-#define STREAMEXECUTOR_KERNEL_H
-
-#include "streamexecutor/Error.h"
-#include "streamexecutor/KernelSpec.h"
-
-#include <memory>
-
-namespace streamexecutor {
-
-class PlatformDevice;
-
-/// The base class for all kernel types.
-///
-/// Stores the name of the kernel in both mangled and demangled forms.
-class KernelBase {
-public:
-  KernelBase(PlatformDevice *D, const void *PlatformKernelHandle,
-             llvm::StringRef Name);
-
-  KernelBase(const KernelBase &Other) = delete;
-  KernelBase &operator=(const KernelBase &Other) = delete;
-
-  KernelBase(KernelBase &&Other) noexcept;
-  KernelBase &operator=(KernelBase &&Other) noexcept;
-
-  ~KernelBase();
-
-  const void *getPlatformHandle() const { return PlatformKernelHandle; }
-  const std::string &getName() const { return Name; }
-  const std::string &getDemangledName() const { return DemangledName; }
-
-private:
-  PlatformDevice *PDevice;
-  const void *PlatformKernelHandle;
-
-  std::string Name;
-  std::string DemangledName;
-};
-
-/// A StreamExecutor kernel.
-///
-/// The template parameters are the types of the parameters to the kernel
-/// function.
-template <typename... ParameterTs> class Kernel : public KernelBase {
-public:
-  Kernel(PlatformDevice *D, const void *PlatformKernelHandle,
-         llvm::StringRef Name)
-      : KernelBase(D, PlatformKernelHandle, Name) {}
-
-  Kernel(Kernel &&Other) noexcept;
-  Kernel &operator=(Kernel &&Other) noexcept;
-};
-
-template <typename... ParameterTs>
-Kernel<ParameterTs...>::Kernel(Kernel<ParameterTs...> &&) noexcept = default;
-
-template <typename... ParameterTs>
-Kernel<ParameterTs...> &Kernel<ParameterTs...>::
-operator=(Kernel<ParameterTs...> &&) noexcept = default;
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_KERNEL_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h b/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
deleted file mode 100644
index a6a293001ec..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/KernelSpec.h
+++ /dev/null
@@ -1,287 +0,0 @@
-//===-- KernelSpec.h - Kernel loader spec types -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// KernelLoaderSpec is the base class for types that know where to find the
-/// code for a data-parallel kernel in a particular format on a particular
-/// platform. So, for example, there will be one subclass that deals with CUDA
-/// PTX code, another subclass that deals with CUDA fatbin code, and yet another
-/// subclass that deals with OpenCL text code.
-///
-/// A MultiKernelLoaderSpec is basically a collection of KernelLoaderSpec
-/// instances. This is useful when code is available for the same kernel in
-/// several different formats or targeted for several different platforms. All
-/// the various KernelLoaderSpec instances for this kernel can be combined
-/// together in one MultiKernelLoaderSpec and the specific platform consumer can
-/// decide which instance of the code it wants to use.
-///
-/// MultiKernelLoaderSpec provides several helper functions to build and
-/// register KernelLoaderSpec instances all in a single operation. For example,
-/// MultiKernelLoaderSpec::addCUDAPTXInMemory can be used to construct and
-/// register a CUDAPTXInMemorySpec KernelLoaderSpec.
-///
-/// The loader spec classes declared here are designed primarily to be
-/// instantiated by the compiler, but they can also be instantiated directly by
-/// the user. A simplified example workflow which a compiler might follow in the
-/// case of a CUDA kernel that is compiled to CUDA fatbin code is as follows:
-///
-/// 1. The user defines a kernel function called \c UserKernel.
-/// 2. The compiler compiles the kernel code into CUDA fatbin data and embeds
-///    that data into the host code at address \c __UserKernelFatbinAddress.
-/// 3. The compiler adds code at the beginning of the host code to instantiate a
-///    MultiKernelLoaderSpec:
-///    \code
-///    namespace compiler_cuda_namespace {
-///      MultiKernelLoaderSpec UserKernelLoaderSpec;
-///    } // namespace compiler_cuda_namespace
-///    \endcode
-/// 4. The compiler then adds code to the host code to add the fatbin data to
-///    the new MultiKernelLoaderSpec, and to associate that data with the kernel
-///    name \c "UserKernel":
-///    \code
-///    namespace compiler_cuda_namespace {
-///      UserKernelLoaderSpec.addCUDAFatbinInMemory(
-///        __UserKernelFatbinAddress, "UserKernel");
-///    } // namespace compiler_cuda_namespace
-///    \endcode
-/// 5. The host code, having known beforehand that the compiler would initialize
-///    a MultiKernelLoaderSpec based on the name of the CUDA kernel, makes use
-///    of the symbol \c cudanamespace::UserKernelLoaderSpec without defining it.
-///
-/// In the example above, the MultiKernelLoaderSpec instance created by the
-/// compiler can be used by the host code to create StreamExecutor kernel
-/// objects. In turn, those StreamExecutor kernel objects can be used by the
-/// host code to launch the kernel on the device as desired.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_KERNELSPEC_H
-#define STREAMEXECUTOR_KERNELSPEC_H
-
-#include <cassert>
-#include <functional>
-#include <map>
-#include <memory>
-#include <string>
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace streamexecutor {
-
-/// An object that knows how to find the code for a device kernel.
-///
-/// This is the base class for the hierarchy of loader specs. The different
-/// subclasses know how to find code in different formats (e.g. CUDA PTX, OpenCL
-/// binary).
-///
-/// This base class has functionality for storing and getting the name of the
-/// kernel as a string.
-class KernelLoaderSpec {
-public:
-  /// Returns the name of the kernel this spec loads.
-  const std::string &getKernelName() const { return KernelName; }
-
-protected:
-  explicit KernelLoaderSpec(llvm::StringRef KernelName);
-
-private:
-  std::string KernelName;
-
-  KernelLoaderSpec(const KernelLoaderSpec &) = delete;
-  KernelLoaderSpec &operator=(const KernelLoaderSpec &) = delete;
-};
-
-/// A KernelLoaderSpec for CUDA PTX code that resides in memory as a
-/// null-terminated string.
-class CUDAPTXInMemorySpec : public KernelLoaderSpec {
-public:
-  /// First component is major version, second component is minor version.
-  using ComputeCapability = std::pair<int, int>;
-
-  /// PTX code combined with its compute capability.
-  struct PTXSpec {
-    ComputeCapability TheComputeCapability;
-    const char *PTXCode;
-  };
-
-  /// Creates a CUDAPTXInMemorySpec from an array of PTXSpec objects.
-  ///
-  /// Adds each item in SpecList to this object.
-  ///
-  /// Does not take ownership of the PTXCode pointers in the SpecList elements.
-  CUDAPTXInMemorySpec(
-      llvm::StringRef KernelName,
-      const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
-
-  /// Returns a pointer to the PTX code for the greatest compute capability not
-  /// exceeding the requested compute capability.
-  ///
-  /// Returns nullptr on failed lookup (if the requested version is not
-  /// available and no lower versions are available).
-  const char *getCode(int ComputeCapabilityMajor,
-                      int ComputeCapabilityMinor) const;
-
-private:
-  /// PTX code contents in memory.
-  ///
-  /// The key is a pair (cc_major, cc_minor), i.e., (2, 0), (3, 0), (3, 5).
-  std::map<ComputeCapability, const char *> PTXByComputeCapability;
-
-  CUDAPTXInMemorySpec(const CUDAPTXInMemorySpec &) = delete;
-  CUDAPTXInMemorySpec &operator=(const CUDAPTXInMemorySpec &) = delete;
-};
-
-/// A KernelLoaderSpec for CUDA fatbin code that resides in memory.
-class CUDAFatbinInMemorySpec : public KernelLoaderSpec {
-public:
-  /// Creates a CUDAFatbinInMemorySpec with a reference to the given fatbin
-  /// bytes.
-  ///
-  /// Does not take ownership of the Bytes pointer.
-  CUDAFatbinInMemorySpec(llvm::StringRef KernelName, const void *Bytes);
-
-  /// Gets the fatbin data bytes.
-  const void *getBytes() const { return Bytes; }
-
-private:
-  const void *Bytes;
-
-  CUDAFatbinInMemorySpec(const CUDAFatbinInMemorySpec &) = delete;
-  CUDAFatbinInMemorySpec &operator=(const CUDAFatbinInMemorySpec &) = delete;
-};
-
-/// A KernelLoaderSpec for OpenCL text that resides in memory as a
-/// null-terminated string.
-class OpenCLTextInMemorySpec : public KernelLoaderSpec {
-public:
-  /// Creates a OpenCLTextInMemorySpec with a reference to the given OpenCL text
-  /// code bytes.
-  ///
-  /// Does not take ownership of the Text pointer.
-  OpenCLTextInMemorySpec(llvm::StringRef KernelName, const char *Text);
-
-  /// Returns the OpenCL text contents.
-  const char *getText() const { return Text; }
-
-private:
-  const char *Text;
-
-  OpenCLTextInMemorySpec(const OpenCLTextInMemorySpec &) = delete;
-  OpenCLTextInMemorySpec &operator=(const OpenCLTextInMemorySpec &) = delete;
-};
-
-/// An object to store several different KernelLoaderSpecs for the same kernel.
-///
-/// This allows code in different formats and for different platforms to be
-/// stored all together for a single kernel.
-///
-/// Various methods are available to add a new KernelLoaderSpec to a
-/// MultiKernelLoaderSpec. There are also methods to query which formats and
-/// platforms are supported by the currently added KernelLoaderSpec objects, and
-/// methods to get the KernelLoaderSpec objects for each format and platform.
-///
-/// Since all stored KernelLoaderSpecs are supposed to reference the same
-/// kernel, they are all assumed to take the same number and type of parameters,
-/// but no checking is done to enforce this. In debug mode, all
-/// KernelLoaderSpecs are checked to make sure they have the same kernel name,
-/// so passing in specs with different kernel names can cause the program to
-/// abort.
-///
-/// This interface is prone to errors, so it is better to leave
-/// MultiKernelLoaderSpec creation and initialization to the compiler rather
-/// than doing it by hand.
-class MultiKernelLoaderSpec {
-public:
-  /// Type of functions used as host platform kernels.
-  using HostFunctionTy = std::function<void(const void **)>;
-
-  std::string getKernelName() const {
-    if (TheKernelName)
-      return *TheKernelName;
-    return "";
-  }
-
-  // Convenience getters for testing whether these platform variants have
-  // kernel loader specifications available.
-
-  bool hasCUDAPTXInMemory() const { return TheCUDAPTXInMemorySpec != nullptr; }
-  bool hasCUDAFatbinInMemory() const {
-    return TheCUDAFatbinInMemorySpec != nullptr;
-  }
-  bool hasOpenCLTextInMemory() const {
-    return TheOpenCLTextInMemorySpec != nullptr;
-  }
-  bool hasHostFunction() const { return HostFunction != nullptr; }
-
-  // Accessors for platform variant kernel load specifications.
-  //
-  // Precondition: corresponding has* method returns true.
-
-  const CUDAPTXInMemorySpec &getCUDAPTXInMemory() const {
-    assert(hasCUDAPTXInMemory() && "getting spec that is not present");
-    return *TheCUDAPTXInMemorySpec;
-  }
-  const CUDAFatbinInMemorySpec &getCUDAFatbinInMemory() const {
-    assert(hasCUDAFatbinInMemory() && "getting spec that is not present");
-    return *TheCUDAFatbinInMemorySpec;
-  }
-  const OpenCLTextInMemorySpec &getOpenCLTextInMemory() const {
-    assert(hasOpenCLTextInMemory() && "getting spec that is not present");
-    return *TheOpenCLTextInMemorySpec;
-  }
-
-  const HostFunctionTy &getHostFunction() const {
-    assert(hasHostFunction() && "getting spec that is not present");
-    return *HostFunction;
-  }
-
-  // Builder-pattern-like methods for use in initializing a
-  // MultiKernelLoaderSpec.
-  //
-  // Each of these should be used at most once for a single
-  // MultiKernelLoaderSpec object. See file comment for example usage.
-  //
-  // Note that the KernelName parameter must be consistent with the kernel in
-  // the PTX or OpenCL being loaded. Also be aware that in CUDA C++ the kernel
-  // name may be mangled by the compiler if it is not declared extern "C".
-
-  /// Does not take ownership of the PTXCode pointers in the SpecList elements.
-  MultiKernelLoaderSpec &
-  addCUDAPTXInMemory(llvm::StringRef KernelName,
-                     llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList);
-
-  /// Does not take ownership of the FatbinBytes pointer.
-  MultiKernelLoaderSpec &addCUDAFatbinInMemory(llvm::StringRef KernelName,
-                                               const void *FatbinBytes);
-
-  /// Does not take ownership of the OpenCLText pointer.
-  MultiKernelLoaderSpec &addOpenCLTextInMemory(llvm::StringRef KernelName,
-                                               const char *OpenCLText);
-
-  MultiKernelLoaderSpec &addHostFunction(llvm::StringRef KernelName,
-                                         HostFunctionTy Function) {
-    HostFunction = llvm::make_unique<HostFunctionTy>(std::move(Function));
-    return *this;
-  }
-
-private:
-  void setKernelName(llvm::StringRef KernelName);
-
-  std::unique_ptr<std::string> TheKernelName;
-  std::unique_ptr<CUDAPTXInMemorySpec> TheCUDAPTXInMemorySpec;
-  std::unique_ptr<CUDAFatbinInMemorySpec> TheCUDAFatbinInMemorySpec;
-  std::unique_ptr<OpenCLTextInMemorySpec> TheOpenCLTextInMemorySpec;
-  std::unique_ptr<HostFunctionTy> HostFunction;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_KERNELSPEC_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h b/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h
deleted file mode 100644
index a88cbfc8cf9..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/LaunchDimensions.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===-- LaunchDimensions.h - Kernel block and grid sizes --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Structures to hold sizes for blocks and grids which are used as parameters
-/// for kernel launches.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_LAUNCHDIMENSIONS_H
-#define STREAMEXECUTOR_LAUNCHDIMENSIONS_H
-
-namespace streamexecutor {
-
-/// The dimensions of a device block of execution.
-///
-/// A block is made up of an array of X by Y by Z threads.
-struct BlockDimensions {
-  BlockDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1)
-      : X(X), Y(Y), Z(Z) {}
-
-  unsigned X;
-  unsigned Y;
-  unsigned Z;
-};
-
-/// The dimensions of a device grid of execution.
-///
-/// A grid is made up of an array of X by Y by Z blocks.
-struct GridDimensions {
-  GridDimensions(unsigned X = 1, unsigned Y = 1, unsigned Z = 1)
-      : X(X), Y(Y), Z(Z) {}
-
-  unsigned X;
-  unsigned Y;
-  unsigned Z;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_LAUNCHDIMENSIONS_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h b/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h
deleted file mode 100644
index f34ec67089f..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/PackedKernelArgumentArray.h
+++ /dev/null
@@ -1,234 +0,0 @@
-//===-- PackedKernelArgumentArray.h - Packed kernel arg types ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// The types in this file are designed to deal with the fact that device memory
-/// kernel arguments are treated differently from other arguments during kernel
-/// argument packing.
-///
-/// GlobalDeviceMemory<T> arguments are passed to a kernel by passing their
-/// opaque handle. SharedDeviceMemory<T> arguments have no associated address,
-/// only a size, so the size is the only information that gets passed to the
-/// kernel launch.
-///
-/// The KernelArgumentType enum is used to keep track of the type of each
-/// argument.
-///
-/// The PackedKernelArgumentArray class uses template metaprogramming to convert
-/// each argument to a PackedKernelArgument with minimal runtime overhead.
-///
-/// The design of the PackedKernelArgumentArray class has a few idiosyncrasies
-/// due to the fact that parameter packing has been identified as
-/// performance-critical in some applications. The packed argument data is
-/// stored as a struct of arrays rather than an array of structs because CUDA
-/// kernel launches in the CUDA driver API take an array of argument addresses.
-/// Having created the array of argument addresses here, no further work will
-/// need to be done in the CUDA driver layer to unpack and repack the addresses.
-///
-/// The shared memory argument count is maintained separately because in the
-/// common case where it is zero, the CUDA layer doesn't have to loop through
-/// the argument array and sum up all the shared memory sizes. This is another
-/// performance optimization that shows up as a quirk in this class interface.
-///
-/// The platform-interface kernel launch function will take the following
-/// arguments, which are provided by this interface:
-///   * argument count,
-///   * array of argument address,
-///   * array of argument sizes,
-///   * array of argument types, and
-///   * shared pointer count.
-/// This information should be enough to allow any platform to launch the kernel
-/// efficiently, although it is probably more information than is needed for any
-/// specific platform.
-///
-/// The PackedKernelArgumentArrayBase class has no template parameters, so it
-/// does not benefit from compile-time type checking. However, since it has no
-/// template parameters, it can be passed as an argument to virtual functions,
-/// and this allows it to be passed to functions that use virtual function
-/// overloading to handle platform-specific kernel launching.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H
-#define STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H
-
-#include <array>
-
-#include "streamexecutor/DeviceMemory.h"
-
-namespace streamexecutor {
-
-enum class KernelArgumentType {
-  VALUE,                /// Non-device-memory argument.
-  GLOBAL_DEVICE_MEMORY, /// Non-shared device memory argument.
-  SHARED_DEVICE_MEMORY  /// Shared device memory argument.
-};
-
-/// An array of packed kernel arguments without compile-time type information.
-///
-/// This un-templated base class is useful because packed kernel arguments must
-/// at some point be passed to a virtual function that performs
-/// platform-specific kernel launches. Such a virtual function cannot be
-/// templated to handle all specializations of the
-/// PackedKernelArgumentArray<...> class template, so, instead, references to
-/// PackedKernelArgumentArray<...> are passed as references to this base class.
-class PackedKernelArgumentArrayBase {
-public:
-  virtual ~PackedKernelArgumentArrayBase();
-
-  /// Gets the number of packed arguments.
-  size_t getArgumentCount() const { return ArgumentCount; }
-
-  /// Gets the address of the argument at the given index.
-  const void *getAddress(size_t Index) const { return AddressesData[Index]; }
-
-  /// Gets the size of the argument at the given index.
-  size_t getSize(size_t Index) const { return SizesData[Index]; }
-
-  /// Gets the type of the argument at the given index.
-  KernelArgumentType getType(size_t Index) const { return TypesData[Index]; }
-
-  /// Gets a pointer to the address array.
-  const void *const *getAddresses() const { return AddressesData; }
-
-  /// Gets a pointer to the sizes array.
-  const size_t *getSizes() const { return SizesData; }
-
-  /// Gets a pointer to the types array.
-  const KernelArgumentType *getTypes() const { return TypesData; }
-
-  /// Gets the number of shared device memory arguments.
-  size_t getSharedCount() const { return SharedCount; }
-
-protected:
-  PackedKernelArgumentArrayBase(size_t ArgumentCount)
-      : ArgumentCount(ArgumentCount), SharedCount(0u) {}
-
-  size_t ArgumentCount;
-  size_t SharedCount;
-  const void *const *AddressesData;
-  size_t *SizesData;
-  KernelArgumentType *TypesData;
-};
-
-/// An array of packed kernel arguments with compile-time type information.
-///
-/// This is used by the platform-independent StreamExecutor code to pack
-/// arguments in a compile-time type-safe way. In order to actually launch a
-/// kernel on a specific platform, however, a reference to this class will have
-/// to be passed to a virtual, platform-specific kernel launch function. Such a
-/// reference will be passed as a reference to the base class rather than a
-/// reference to this subclass itself because a virtual function cannot be
-/// templated in such a way to maintain the template parameter types of the
-/// subclass.
-template <typename... ParameterTs>
-class PackedKernelArgumentArray : public PackedKernelArgumentArrayBase {
-public:
-  /// Constructs an instance by packing the specified arguments.
-  ///
-  /// Rather than using this constructor directly, consider using the
-  /// make_kernel_argument_pack function instead, to get the compiler to infer
-  /// the parameter types for you.
-  PackedKernelArgumentArray(const ParameterTs &... Arguments)
-      : PackedKernelArgumentArrayBase(sizeof...(ParameterTs)) {
-    AddressesData = Addresses.data();
-    SizesData = Sizes.data();
-    TypesData = Types.data();
-    PackArguments(0, Arguments...);
-  }
-
-  ~PackedKernelArgumentArray() override = default;
-
-private:
-  // Base case for PackArguments when there are no arguments to pack.
-  void PackArguments(size_t) {}
-
-  // Induction step for PackArguments.
-  template <typename T, typename... RemainingParameterTs>
-  void PackArguments(size_t Index, const T &Argument,
-                     const RemainingParameterTs &... RemainingArguments) {
-    PackOneArgument(Index, Argument);
-    PackArguments(Index + 1, RemainingArguments...);
-  }
-
-  // Pack a normal, non-device-memory argument.
-  template <typename T> void PackOneArgument(size_t Index, const T &Argument) {
-    Addresses[Index] = &Argument;
-    Sizes[Index] = sizeof(T);
-    Types[Index] = KernelArgumentType::VALUE;
-  }
-
-  // Pack a GlobalDeviceMemory<T> argument.
-  template <typename T>
-  void PackOneArgument(size_t Index, const GlobalDeviceMemory<T> &Argument) {
-    Addresses[Index] = Argument.getHandleAddress();
-    Sizes[Index] = sizeof(void *);
-    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
-  }
-
-  // Pack a GlobalDeviceMemory<T> pointer argument.
-  template <typename T>
-  void PackOneArgument(size_t Index, GlobalDeviceMemory<T> *Argument) {
-    Addresses[Index] = Argument->getHandleAddress();
-    Sizes[Index] = sizeof(void *);
-    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
-  }
-
-  // Pack a const GlobalDeviceMemory<T> pointer argument.
-  template <typename T>
-  void PackOneArgument(size_t Index, const GlobalDeviceMemory<T> *Argument) {
-    Addresses[Index] = Argument->getHandleAddress();
-    Sizes[Index] = sizeof(void *);
-    Types[Index] = KernelArgumentType::GLOBAL_DEVICE_MEMORY;
-  }
-
-  // Pack a SharedDeviceMemory argument.
-  template <typename T>
-  void PackOneArgument(size_t Index, const SharedDeviceMemory<T> &Argument) {
-    ++SharedCount;
-    Addresses[Index] = nullptr;
-    Sizes[Index] = Argument.getElementCount() * sizeof(T);
-    Types[Index] = KernelArgumentType::SHARED_DEVICE_MEMORY;
-  }
-
-  // Pack a SharedDeviceMemory pointer argument.
-  template <typename T>
-  void PackOneArgument(size_t Index, SharedDeviceMemory<T> *Argument) {
-    ++SharedCount;
-    Addresses[Index] = nullptr;
-    Sizes[Index] = Argument->getElementCount() * sizeof(T);
-    Types[Index] = KernelArgumentType::SHARED_DEVICE_MEMORY;
-  }
-
-  // Pack a const SharedDeviceMemory pointer argument.
-  template <typename T>
-  void PackOneArgument(size_t Index, const SharedDeviceMemory<T> *Argument) {
-    ++SharedCount;
-    Addresses[Index] = nullptr;
-    Sizes[Index] = Argument->getElementCount() * sizeof(T);
-    Types[Index] = KernelArgumentType::SHARED_DEVICE_MEMORY;
-  }
-
-  std::array<const void *, sizeof...(ParameterTs)> Addresses;
-  std::array<size_t, sizeof...(ParameterTs)> Sizes;
-  std::array<KernelArgumentType, sizeof...(ParameterTs)> Types;
-};
-
-// Utility template function to call the PackedKernelArgumentArray constructor
-// with the template arguments matching the types of the arguments passed to
-// this function.
-template <typename... ParameterTs>
-PackedKernelArgumentArray<ParameterTs...>
-make_kernel_argument_pack(const ParameterTs &... Arguments) {
-  return PackedKernelArgumentArray<ParameterTs...>(Arguments...);
-}
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PACKEDKERNELARGUMENTARRAY_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Platform.h b/parallel-libs/streamexecutor/include/streamexecutor/Platform.h
deleted file mode 100644
index 8ced35d2066..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/Platform.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- Platform.h - The Platform class -------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// The Platform class which represents a platform such as CUDA or OpenCL.
-///
-/// This is an abstract base class that will be overridden by each specific
-/// platform.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORM_H
-#define STREAMEXECUTOR_PLATFORM_H
-
-#include "streamexecutor/Error.h"
-
-namespace streamexecutor {
-
-class Device;
-
-class Platform {
-public:
-  virtual ~Platform();
-
-  /// Gets the number of devices available for this platform.
-  virtual size_t getDeviceCount() const = 0;
-
-  /// Gets a Device with the given index for this platform.
-  virtual Expected<Device> getDevice(size_t DeviceIndex) = 0;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORM_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h
deleted file mode 100644
index 5b10e7067b1..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformDevice.h
+++ /dev/null
@@ -1,171 +0,0 @@
-//===-- PlatformDevice.h - PlatformDevice class -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Declaration of the PlatformDevice class.
-///
-/// Each specific platform such as CUDA or OpenCL must subclass PlatformDevice
-/// and override streamexecutor::Platform::getDevice to return an instance of
-/// their PlatformDevice subclass.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORMDEVICE_H
-#define STREAMEXECUTOR_PLATFORMDEVICE_H
-
-#include "streamexecutor/DeviceMemory.h"
-#include "streamexecutor/Error.h"
-#include "streamexecutor/Kernel.h"
-#include "streamexecutor/LaunchDimensions.h"
-#include "streamexecutor/PackedKernelArgumentArray.h"
-
-namespace streamexecutor {
-
-/// Raw executor methods that must be implemented by each platform.
-///
-/// The public Device and Stream classes have the type-safe versions of the
-/// functions in this interface.
-class PlatformDevice {
-public:
-  virtual ~PlatformDevice();
-
-  virtual std::string getName() const = 0;
-
-  virtual std::string getPlatformName() const = 0;
-
-  /// Creates a platform-specific kernel.
-  virtual Expected<const void *>
-  createKernel(const MultiKernelLoaderSpec &Spec) {
-    return make_error("createKernel not implemented for platform " +
-                      getPlatformName());
-  }
-
-  virtual Error destroyKernel(const void *Handle) {
-    return make_error("destroyKernel not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Creates a platform-specific stream.
-  virtual Expected<const void *> createStream() {
-    return make_error("createStream not implemented for platform " +
-                      getPlatformName());
-  }
-
-  virtual Error destroyStream(const void *Handle) {
-    return make_error("destroyStream not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Launches a kernel on the given stream.
-  virtual Error launch(const void *PlatformStreamHandle,
-                       BlockDimensions BlockSize, GridDimensions GridSize,
-                       const void *PKernelHandle,
-                       const PackedKernelArgumentArrayBase &ArgumentArray) {
-    return make_error("launch not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Copies data from the device to the host.
-  ///
-  /// HostDst should have been registered with registerHostMemory.
-  virtual Error copyD2H(const void *PlatformStreamHandle,
-                        const void *DeviceSrcHandle, size_t SrcByteOffset,
-                        void *HostDst, size_t DstByteOffset, size_t ByteCount) {
-    return make_error("copyD2H not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Copies data from the host to the device.
-  ///
-  /// HostSrc should have been registered with registerHostMemory.
-  virtual Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
-                        size_t SrcByteOffset, const void *DeviceDstHandle,
-                        size_t DstByteOffset, size_t ByteCount) {
-    return make_error("copyH2D not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Copies data from one device location to another.
-  virtual Error copyD2D(const void *PlatformStreamHandle,
-                        const void *DeviceSrcHandle, size_t SrcByteOffset,
-                        const void *DeviceDstHandle, size_t DstByteOffset,
-                        size_t ByteCount) {
-    return make_error("copyD2D not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Blocks the host until the given stream completes all the work enqueued up
-  /// to the point this function is called.
-  virtual Error blockHostUntilDone(const void *PlatformStreamHandle) {
-    return make_error("blockHostUntilDone not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Allocates untyped device memory of a given size in bytes.
-  virtual Expected<void *> allocateDeviceMemory(size_t ByteCount) {
-    return make_error("allocateDeviceMemory not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Frees device memory previously allocated by allocateDeviceMemory.
-  virtual Error freeDeviceMemory(const void *Handle) {
-    return make_error("freeDeviceMemory not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Registers previously allocated host memory so it can be used with copyH2D
-  /// and copyD2H.
-  virtual Error registerHostMemory(void *Memory, size_t ByteCount) {
-    return make_error("registerHostMemory not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Unregisters host memory previously registered with registerHostMemory.
-  virtual Error unregisterHostMemory(const void *Memory) {
-    return make_error("unregisterHostMemory not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Copies the given number of bytes from device memory to host memory.
-  ///
-  /// Blocks the calling host thread until the copy is completed. Can operate on
-  /// any host memory, not just registered host memory. Does not block any
-  /// ongoing device calls.
-  virtual Error synchronousCopyD2H(const void *DeviceSrcHandle,
-                                   size_t SrcByteOffset, void *HostDst,
-                                   size_t DstByteOffset, size_t ByteCount) {
-    return make_error("synchronousCopyD2H not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Similar to synchronousCopyD2H(const void *, size_t, void
-  /// *, size_t, size_t), but copies memory from host to device rather than
-  /// device to host.
-  virtual Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
-                                   const void *DeviceDstHandle,
-                                   size_t DstByteOffset, size_t ByteCount) {
-    return make_error("synchronousCopyH2D not implemented for platform " +
-                      getPlatformName());
-  }
-
-  /// Similar to synchronousCopyD2H(const void *, size_t, void
-  /// *, size_t, size_t), but copies memory from one location in device memory
-  /// to another rather than from device to host.
-  virtual Error synchronousCopyD2D(const void *DeviceSrcHandle,
-                                   size_t SrcByteOffset,
-                                   const void *DeviceDstHandle,
-                                   size_t DstByteOffset, size_t ByteCount) {
-    return make_error("synchronousCopyD2D not implemented for platform " +
-                      getPlatformName());
-  }
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORMDEVICE_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformManager.h b/parallel-libs/streamexecutor/include/streamexecutor/PlatformManager.h
deleted file mode 100644
index 7d0de12528d..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformManager.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- PlatformManager.h - The PlatformManager class -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// PlatformManager is the entry point into the StreamExecutor API. A user
-/// begins be calling PlatformManager::getPlatformByName("cuda") where "cuda"
-/// can be replaced by any supported platform name. This gives the user a
-/// Platform object that can be used to create Device objects for that platform,
-/// etc.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORMMANAGER_H
-#define STREAMEXECUTOR_PLATFORMMANAGER_H
-
-#include <map>
-
-#include "streamexecutor/Error.h"
-#include "streamexecutor/Platform.h"
-
-namespace streamexecutor {
-
-/// A singleton that holds a reference to a Platform object for each
-/// supported StreamExecutor platform.
-class PlatformManager {
-public:
-  /// Gets a reference to the Platform with the given name.
-  ///
-  /// The name parameter is not case-sensitive, so the following arguments are
-  /// all equivalent: "cuda", "CUDA", "Cuda", "cUdA".
-  ///
-  /// Returns an error if no platform is present for the name.
-  ///
-  /// Ownership of the platform is NOT transferred to the caller.
-  static Expected<Platform *> getPlatformByName(llvm::StringRef Name);
-
-private:
-  PlatformManager();
-  PlatformManager(const PlatformManager &) = delete;
-  PlatformManager operator=(const PlatformManager &) = delete;
-
-  std::map<std::string, std::unique_ptr<Platform>> PlatformsByName;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORMMANAGER_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/PlatformOptions.h.in b/parallel-libs/streamexecutor/include/streamexecutor/PlatformOptions.h.in
deleted file mode 100644
index 2934dd428ea..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/PlatformOptions.h.in
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- PlatformOptions.h - Platform option macros --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This contents of this file are filled in at configuration time. This file
-/// defines macros that represent the platform configuration state of the build,
-/// e.g. which platforms are enabled.
-///
-//===----------------------------------------------------------------------===//
-
-
-#ifndef STREAMEXECUTOR_PLATFORMOPTIONS_H
-#define STREAMEXECUTOR_PLATFORMOPTIONS_H
-
-#cmakedefine STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-
-#endif // STREAMEXECUTOR_PLATFORMOPTIONS_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h b/parallel-libs/streamexecutor/include/streamexecutor/Stream.h
deleted file mode 100644
index bdff7ff9701..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/Stream.h
+++ /dev/null
@@ -1,313 +0,0 @@
-//===-- Stream.h - A stream of execution ------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-///
-/// A Stream instance represents a queue of sequential, host-asynchronous work
-/// to be performed on a device.
-///
-/// To enqueue work on a device, first create a Device instance then use that
-/// Device to create a Stream instance. The Stream instance will perform its
-/// work on the device managed by the Device object that created it.
-///
-/// The various "then" methods of the Stream object, such as thenCopyH2D and
-/// thenLaunch, may be used to enqueue work on the Stream, and the
-/// blockHostUntilDone() method may be used to block the host code until the
-/// Stream has completed all its work.
-///
-/// Multiple Stream instances can be created for the same Device. This allows
-/// several independent streams of computation to be performed simultaneously on
-/// a single device.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_STREAM_H
-#define STREAMEXECUTOR_STREAM_H
-
-#include <cassert>
-#include <memory>
-#include <string>
-#include <type_traits>
-
-#include "streamexecutor/DeviceMemory.h"
-#include "streamexecutor/Error.h"
-#include "streamexecutor/HostMemory.h"
-#include "streamexecutor/Kernel.h"
-#include "streamexecutor/LaunchDimensions.h"
-#include "streamexecutor/PackedKernelArgumentArray.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/RWMutex.h"
-
-namespace streamexecutor {
-
-/// Represents a stream of dependent computations on a device.
-///
-/// The operations within a stream execute sequentially and asynchronously until
-/// blockHostUntilDone() is invoked, which synchronously joins host code with
-/// the execution of the stream.
-///
-/// If any given operation fails when entraining work for the stream, isOK()
-/// will indicate that an error has occurred and getStatus() will get the first
-/// error that occurred on the stream. There is no way to clear the error state
-/// of a stream once it is in an error state.
-class Stream {
-public:
-  Stream(PlatformDevice *D, const void *PlatformStreamHandle);
-
-  Stream(const Stream &Other) = delete;
-  Stream &operator=(const Stream &Other) = delete;
-
-  Stream(Stream &&Other) noexcept;
-  Stream &operator=(Stream &&Other) noexcept;
-
-  ~Stream();
-
-  /// Returns whether any error has occurred while entraining work on this
-  /// stream.
-  bool isOK() const {
-    llvm::sys::ScopedReader ReaderLock(*ErrorMessageMutex);
-    return !ErrorMessage;
-  }
-
-  /// Returns the status created by the first error that occurred while
-  /// entraining work on this stream.
-  Error getStatus() const {
-    llvm::sys::ScopedReader ReaderLock(*ErrorMessageMutex);
-    if (ErrorMessage)
-      return make_error(*ErrorMessage);
-    else
-      return Error::success();
-  }
-
-  // Blocks the calling host thread until all work enqueued on this Stream
-  // completes.
-  //
-  // Returns the result of getStatus() after the Stream work completes.
-  Error blockHostUntilDone() {
-    setError(PDevice->blockHostUntilDone(PlatformStreamHandle));
-    return getStatus();
-  }
-
-  /// Entrains onto the stream of operations a kernel launch with the given
-  /// arguments.
-  ///
-  /// These arguments can be device memory types like GlobalDeviceMemory<T> and
-  /// SharedDeviceMemory<T>, or they can be primitive types such as int. The
-  /// allowable argument types are determined by the template parameters to the
-  /// Kernel argument.
-  template <typename... ParameterTs>
-  Stream &thenLaunch(BlockDimensions BlockSize, GridDimensions GridSize,
-                     const Kernel<ParameterTs...> &K,
-                     const ParameterTs &... Arguments) {
-    auto ArgumentArray =
-        make_kernel_argument_pack<ParameterTs...>(Arguments...);
-    setError(PDevice->launch(PlatformStreamHandle, BlockSize, GridSize,
-                             K.getPlatformHandle(), ArgumentArray));
-    return *this;
-  }
-
-  /// \name Device memory copying functions
-  ///
-  /// These methods enqueue a device memory copy operation on the stream and
-  /// return without waiting for the operation to complete.
-  ///
-  /// The arguments and bounds checking for these methods match the API of the
-  /// \ref DeviceHostSyncCopyGroup
-  /// "host-synchronous device memory copying functions" of Device.
-  ///
-  /// The template types SrcTy and DstTy must match the following constraints:
-  ///   * Must define typename ElementTy (the type of element stored in the
-  ///   memory);
-  ///   * ElementTy for the source argument must be the same as ElementTy for
-  ///     the destination argument;
-  ///   * Must be convertible to the correct slice type:
-  ///     * GlobalDeviceMemorySlice<ElementTy> for device memory arguments,
-  ///     * RegisteredHostMemorySlice<ElementTy> for host memory source
-  ///       arguments,
-  ///     * MutableRegisteredHostMemorySlice<ElementT> for host memory
-  ///       destination arguments.
-  ///@{
-
-  // D2H
-
-  template <typename SrcTy, typename DstTy>
-  Stream &thenCopyD2H(SrcTy &&Src, DstTy &&Dst, size_t ElementCount) {
-    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy;
-    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy;
-    static_assert(std::is_same<SrcElemTy, DstElemTy>::value,
-                  "src/dst element type mismatch for thenCopyD2H");
-    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src);
-    MutableRegisteredHostMemorySlice<DstElemTy> DstSlice(Dst);
-    if (ElementCount > Src.getElementCount())
-      setError("copying too many elements, " + llvm::Twine(ElementCount) +
-               ", from a device array of element count " +
-               llvm::Twine(SrcSlice.getElementCount()));
-    else if (ElementCount > DstSlice.getElementCount())
-      setError("copying too many elements, " + llvm::Twine(ElementCount) +
-               ", to a host array of element count " +
-               llvm::Twine(DstSlice.getElementCount()));
-    else
-      setError(PDevice->copyD2H(
-          PlatformStreamHandle, SrcSlice.getBaseMemory().getHandle(),
-          SrcSlice.getElementOffset() * sizeof(SrcElemTy),
-          DstSlice.getPointer(), 0, ElementCount * sizeof(DstElemTy)));
-    return *this;
-  }
-
-  template <typename SrcTy, typename DstTy>
-  Stream &thenCopyD2H(SrcTy &&Src, DstTy &&Dst) {
-    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy;
-    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy;
-    static_assert(std::is_same<SrcElemTy, DstElemTy>::value,
-                  "src/dst element type mismatch for thenCopyD2H");
-    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src);
-    MutableRegisteredHostMemorySlice<DstElemTy> DstSlice(Dst);
-    if (SrcSlice.getElementCount() != DstSlice.getElementCount())
-      setError("array size mismatch for D2H, device source has element count " +
-               llvm::Twine(SrcSlice.getElementCount()) +
-               " but host destination has element count " +
-               llvm::Twine(DstSlice.getElementCount()));
-    else
-      thenCopyD2H(SrcSlice, DstSlice, SrcSlice.getElementCount());
-    return *this;
-  }
-
-  // H2D
-
-  template <typename SrcTy, typename DstTy>
-  Stream &thenCopyH2D(SrcTy &&Src, DstTy &&Dst, size_t ElementCount) {
-    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy;
-    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy;
-    static_assert(std::is_same<SrcElemTy, DstElemTy>::value,
-                  "src/dst element type mismatch for thenCopyH2D");
-    RegisteredHostMemorySlice<SrcElemTy> SrcSlice(Src);
-    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst);
-    if (ElementCount > SrcSlice.getElementCount())
-      setError("copying too many elements, " + llvm::Twine(ElementCount) +
-               ", from a host array of element count " +
-               llvm::Twine(SrcSlice.getElementCount()));
-    else if (ElementCount > DstSlice.getElementCount())
-      setError("copying too many elements, " + llvm::Twine(ElementCount) +
-               ", to a device array of element count " +
-               llvm::Twine(DstSlice.getElementCount()));
-    else
-      setError(PDevice->copyH2D(PlatformStreamHandle, SrcSlice.getPointer(), 0,
-                                DstSlice.getBaseMemory().getHandle(),
-                                DstSlice.getElementOffset() * sizeof(DstElemTy),
-                                ElementCount * sizeof(SrcElemTy)));
-    return *this;
-  }
-
-  template <typename SrcTy, typename DstTy>
-  Stream &thenCopyH2D(SrcTy &&Src, DstTy &&Dst) {
-    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy;
-    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy;
-    static_assert(std::is_same<SrcElemTy, DstElemTy>::value,
-                  "src/dst element type mismatch for thenCopyH2D");
-    RegisteredHostMemorySlice<SrcElemTy> SrcSlice(Src);
-    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst);
-    if (SrcSlice.getElementCount() != DstSlice.getElementCount())
-      setError("array size mismatch for H2D, host source has element count " +
-               llvm::Twine(SrcSlice.getElementCount()) +
-               " but device destination has element count " +
-               llvm::Twine(DstSlice.getElementCount()));
-    else
-      thenCopyH2D(SrcSlice, DstSlice, DstSlice.getElementCount());
-    return *this;
-  }
-
-  // D2D
-
-  template <typename SrcTy, typename DstTy>
-  Stream &thenCopyD2D(SrcTy &&Src, DstTy &&Dst, size_t ElementCount) {
-    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy;
-    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy;
-    static_assert(std::is_same<SrcElemTy, DstElemTy>::value,
-                  "src/dst element type mismatch for thenCopyD2D");
-    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src);
-    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst);
-    if (ElementCount > SrcSlice.getElementCount())
-      setError("copying too many elements, " + llvm::Twine(ElementCount) +
-               ", from a device array of element count " +
-               llvm::Twine(SrcSlice.getElementCount()));
-    else if (ElementCount > DstSlice.getElementCount())
-      setError("copying too many elements, " + llvm::Twine(ElementCount) +
-               ", to a device array of element count " +
-               llvm::Twine(DstSlice.getElementCount()));
-    else
-      setError(PDevice->copyD2D(PlatformStreamHandle,
-                                SrcSlice.getBaseMemory().getHandle(),
-                                SrcSlice.getElementOffset() * sizeof(SrcElemTy),
-                                DstSlice.getBaseMemory().getHandle(),
-                                DstSlice.getElementOffset() * sizeof(DstElemTy),
-                                ElementCount * sizeof(SrcElemTy)));
-    return *this;
-  }
-
-  template <typename SrcTy, typename DstTy>
-  Stream &thenCopyD2D(SrcTy &&Src, DstTy &&Dst) {
-    using SrcElemTy = typename std::remove_reference<SrcTy>::type::ElementTy;
-    using DstElemTy = typename std::remove_reference<DstTy>::type::ElementTy;
-    static_assert(std::is_same<SrcElemTy, DstElemTy>::value,
-                  "src/dst element type mismatch for thenCopyD2D");
-    GlobalDeviceMemorySlice<SrcElemTy> SrcSlice(Src);
-    GlobalDeviceMemorySlice<DstElemTy> DstSlice(Dst);
-    if (SrcSlice.getElementCount() != DstSlice.getElementCount())
-      setError("array size mismatch for D2D, device source has element count " +
-               llvm::Twine(SrcSlice.getElementCount()) +
-               " but device destination has element count " +
-               llvm::Twine(DstSlice.getElementCount()));
-    else
-      thenCopyD2D(SrcSlice, DstSlice, SrcSlice.getElementCount());
-    return *this;
-  }
-
-  ///@} End device memory copying functions
-
-private:
-  /// Sets the error state from an Error object.
-  ///
-  /// Does not overwrite the error if it is already set.
-  void setError(Error &&E) {
-    if (E) {
-      llvm::sys::ScopedWriter WriterLock(*ErrorMessageMutex);
-      if (!ErrorMessage)
-        ErrorMessage = consumeAndGetMessage(std::move(E));
-    }
-  }
-
-  /// Sets the error state from an error message.
-  ///
-  /// Does not overwrite the error if it is already set.
-  void setError(const llvm::Twine &Message) {
-    llvm::sys::ScopedWriter WriterLock(*ErrorMessageMutex);
-    if (!ErrorMessage)
-      ErrorMessage = Message.str();
-  }
-
-  /// The PlatformDevice that supports the operations of this stream.
-  PlatformDevice *PDevice;
-
-  /// The platform-specific stream handle for this instance.
-  const void *PlatformStreamHandle;
-
-  /// Mutex that guards the error state flags.
-  std::unique_ptr<llvm::sys::RWMutex> ErrorMessageMutex;
-
-  /// First error message for an operation in this stream or empty if there have
-  /// been no errors.
-  llvm::Optional<std::string> ErrorMessage;
-};
-
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_STREAM_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/StreamExecutor.h b/parallel-libs/streamexecutor/include/streamexecutor/StreamExecutor.h
deleted file mode 100644
index 942cd325f85..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/StreamExecutor.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- StreamExecutor.h - Main include file for StreamExecutor -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-/// \mainpage Welcome to StreamExecutor
-///
-/// \section Introduction
-/// \b StreamExecutor is a wrapper around CUDA and OpenCL host-side programming
-/// models (runtimes). This abstraction cleanly permits host code to target
-/// either CUDA or OpenCL devices with identically-functioning data parallel
-/// kernels. It manages the execution of concurrent work targeting the
-/// accelerator, similar to a host-side Executor.
-///
-/// This version of StreamExecutor can be built either as a sub-project of the
-/// LLVM project or as a standalone project depending on LLVM as an external
-/// package.
-///
-/// \subsection ExampleUsage Example Usage
-/// Below is an example of the use of the StreamExecutor API:
-///
-/// \snippet examples/CUDASaxpy.cpp Example saxpy host main
-///
-/// In the example, a couple of handler functions, \c getOrDie and \c
-/// dieIfError, are used to handle error return values in the StreamExecutor
-/// API. These functions are provided by StreamExecutor for quick-and-dirty
-/// error handling, but real applications will likely want to define their own
-/// versions of these handlers so that errors are handled more gracefully than
-/// just exiting the program.
-///
-/// \subsection CompilerGeneratedCode Compiler-Generated Code
-///
-/// The example also references some symbols from a compiler-generated
-/// namespace:
-///
-/// \snippet examples/CUDASaxpy.cpp Example saxpy compiler-generated
-///
-/// Instead of depending on the compiler to generate this code, you can
-/// technically write the code yourself, but this is not recommended because the
-/// code is very error-prone. For example, the template parameters for the
-/// Kernel specialization have to match the parameter types for the device
-/// kernel, and the MultiKernelLoaderSpec has to be initialized with valid
-/// device code for the kernel. Errors in this code will not show up until
-/// runtime, and may only show up as garbage output rather than an explicit
-/// error, which can be very hard to debug, so again, it is strongly advised not
-/// to write this code yourself.
-///
-/// The example compiler-generated code uses a PTX string in the source code to
-/// store the device code, but the device code can also be stored in other
-/// formats such as CUDA cubin and CUDA fatbin. Furthermore, device code can be
-/// stored for other platforms such as OpenCL, and StreamExecutor will pick the
-/// right device code at runtime based on the user's platform selection. See
-/// streamexecutor::MultiKernelLoaderSpec for details of how device code can be
-/// stored for different platforms, but again, the code to set up the
-/// MultiKernelLoaderSpec instance should be generated by the compiler if
-/// possible, not by the user.
-
-/// \example examples/CUDASaxpy.cpp
-/// Running saxpy on a CUDA device.
-
-#ifndef STREAMEXECUTOR_STREAMEXECUTOR_H
-#define STREAMEXECUTOR_STREAMEXECUTOR_H
-
-#include "Device.h"
-#include "Kernel.h"
-#include "KernelSpec.h"
-#include "Platform.h"
-#include "PlatformManager.h"
-#include "Stream.h"
-
-#endif // STREAMEXECUTOR_STREAMEXECUTOR_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h
deleted file mode 100644
index cbcd29af819..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatform.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===-- CUDAPlatform.h - CUDA platform subclass -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Declaration of the CUDAPlatform class.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
-#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
-
-#include "streamexecutor/Platform.h"
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-
-#include "llvm/Support/Mutex.h"
-
-#include <map>
-
-namespace streamexecutor {
-namespace cuda {
-
-class CUDAPlatform : public Platform {
-public:
-  size_t getDeviceCount() const override;
-
-  Expected<Device> getDevice(size_t DeviceIndex) override;
-
-private:
-  llvm::sys::Mutex Mutex;
-  std::map<size_t, CUDAPlatformDevice> PlatformDevices;
-};
-
-} // namespace cuda
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORM_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h
deleted file mode 100644
index b7c32985136..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/cuda/CUDAPlatformDevice.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===-- CUDAPlatformDevice.h - CUDAPlatformDevice class ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Declaration of the CUDAPlatformDevice class.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
-#define STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
-
-#include "streamexecutor/PlatformDevice.h"
-
-namespace streamexecutor {
-namespace cuda {
-
-Error CUresultToError(int CUResult, const llvm::Twine &Message);
-
-class CUDAPlatformDevice : public PlatformDevice {
-public:
-  static Expected<CUDAPlatformDevice> create(size_t DeviceIndex);
-
-  CUDAPlatformDevice(const CUDAPlatformDevice &) = delete;
-  CUDAPlatformDevice &operator=(const CUDAPlatformDevice &) = delete;
-
-  CUDAPlatformDevice(CUDAPlatformDevice &&) noexcept;
-  CUDAPlatformDevice &operator=(CUDAPlatformDevice &&) noexcept;
-
-  ~CUDAPlatformDevice() override;
-
-  std::string getName() const override;
-
-  std::string getPlatformName() const override { return "CUDA"; }
-
-  Expected<const void *>
-  createKernel(const MultiKernelLoaderSpec &Spec) override;
-  Error destroyKernel(const void *Handle) override;
-
-  Expected<const void *> createStream() override;
-  Error destroyStream(const void *Handle) override;
-
-  Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize,
-               GridDimensions GridSize, const void *PKernelHandle,
-               const PackedKernelArgumentArrayBase &ArgumentArray) override;
-
-  Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
-                size_t SrcByteOffset, void *HostDst, size_t DstByteOffset,
-                size_t ByteCount) override;
-
-  Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
-                size_t SrcByteOffset, const void *DeviceDstHandle,
-                size_t DstByteOffset, size_t ByteCount) override;
-
-  Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
-                size_t SrcByteOffset, const void *DeviceDstHandle,
-                size_t DstByteOffset, size_t ByteCount) override;
-
-  Error blockHostUntilDone(const void *PlatformStreamHandle) override;
-
-  Expected<void *> allocateDeviceMemory(size_t ByteCount) override;
-  Error freeDeviceMemory(const void *Handle) override;
-
-  Error registerHostMemory(void *Memory, size_t ByteCount) override;
-  Error unregisterHostMemory(const void *Memory) override;
-
-  Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset,
-                           void *HostDst, size_t DstByteOffset,
-                           size_t ByteCount) override;
-
-  Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
-                           const void *DeviceDstHandle, size_t DstByteOffset,
-                           size_t ByteCount) override;
-
-  Error synchronousCopyD2D(const void *DeviceDstHandle, size_t DstByteOffset,
-                           const void *DeviceSrcHandle, size_t SrcByteOffset,
-                           size_t ByteCount) override;
-
-private:
-  CUDAPlatformDevice(size_t DeviceIndex) : DeviceIndex(DeviceIndex) {}
-
-  int DeviceIndex;
-};
-
-} // namespace cuda
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORMS_CUDA_CUDAPLATFORMDEVICE_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h
deleted file mode 100644
index 338e3f6265a..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatform.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- HostPlatform.h - Host platform subclass -----------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Declaration of the HostPlatform class.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H
-#define STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H
-
-#include "HostPlatformDevice.h"
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Platform.h"
-
-#include "llvm/Support/Mutex.h"
-
-namespace streamexecutor {
-namespace host {
-
-/// Platform that performs work on the host rather than offloading to an
-/// accelerator.
-class HostPlatform : public Platform {
-public:
-  size_t getDeviceCount() const override { return 1; }
-
-  Expected<Device> getDevice(size_t DeviceIndex) override {
-    if (DeviceIndex != 0) {
-      return make_error(
-          "Requested device index " + llvm::Twine(DeviceIndex) +
-          " from host platform which only supports device index 0");
-    }
-    llvm::sys::ScopedLock Lock(Mutex);
-    if (!ThePlatformDevice)
-      ThePlatformDevice = llvm::make_unique<HostPlatformDevice>();
-    return Device(ThePlatformDevice.get());
-  }
-
-private:
-  llvm::sys::Mutex Mutex;
-  std::unique_ptr<HostPlatformDevice> ThePlatformDevice;
-};
-
-} // namespace host
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORM_H
diff --git a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h b/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h
deleted file mode 100644
index d6655756450..00000000000
--- a/parallel-libs/streamexecutor/include/streamexecutor/platforms/host/HostPlatformDevice.h
+++ /dev/null
@@ -1,161 +0,0 @@
-//===-- HostPlatformDevice.h - HostPlatformDevice class ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Declaration of the HostPlatformDevice class.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H
-#define STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H
-
-#include <cstdlib>
-#include <cstring>
-
-#include "streamexecutor/PlatformDevice.h"
-
-namespace streamexecutor {
-namespace host {
-
-/// A concrete PlatformDevice subclass that performs its work on the host rather
-/// than offloading to an accelerator.
-class HostPlatformDevice : public PlatformDevice {
-public:
-  std::string getName() const override { return "host"; }
-
-  std::string getPlatformName() const override { return "host"; }
-
-  Expected<const void *>
-  createKernel(const MultiKernelLoaderSpec &Spec) override {
-    if (!Spec.hasHostFunction()) {
-      return make_error("no host implementation available for kernel " +
-                        Spec.getKernelName());
-    }
-    return static_cast<const void *>(&Spec.getHostFunction());
-  }
-
-  Error destroyKernel(const void *Handle) override { return Error::success(); }
-
-  Expected<const void *> createStream() override {
-    // TODO(jhen): Do something with threads to allow multiple streams.
-    return this;
-  }
-
-  Error destroyStream(const void *Handle) override { return Error::success(); }
-
-  Error launch(const void *PlatformStreamHandle, BlockDimensions BlockSize,
-               GridDimensions GridSize, const void *PKernelHandle,
-               const PackedKernelArgumentArrayBase &ArgumentArray) override {
-    // TODO(jhen): Can we do something with BlockSize and GridSize?
-    if (!(BlockSize.X == 1 && BlockSize.Y == 1 && BlockSize.Z == 1)) {
-      return make_error(
-          "Block dimensions were (" + llvm::Twine(BlockSize.X) + "," +
-          llvm::Twine(BlockSize.Y) + "," + llvm::Twine(BlockSize.Z) +
-          "), but only size (1,1,1) is permitted for this platform");
-    }
-    if (!(GridSize.X == 1 && GridSize.Y == 1 && GridSize.Z == 1)) {
-      return make_error(
-          "Grid dimensions were (" + llvm::Twine(GridSize.X) + "," +
-          llvm::Twine(GridSize.Y) + "," + llvm::Twine(GridSize.Z) +
-          "), but only size (1,1,1) is permitted for this platform");
-    }
-
-    (*static_cast<const std::function<void(const void *const *)> *>(
-        PKernelHandle))(ArgumentArray.getAddresses());
-    return Error::success();
-  }
-
-  Error copyD2H(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
-                size_t SrcByteOffset, void *HostDst, size_t DstByteOffset,
-                size_t ByteCount) override {
-    std::memcpy(offset(HostDst, DstByteOffset),
-                offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
-    return Error::success();
-  }
-
-  Error copyH2D(const void *PlatformStreamHandle, const void *HostSrc,
-                size_t SrcByteOffset, const void *DeviceDstHandle,
-                size_t DstByteOffset, size_t ByteCount) override {
-    std::memcpy(offset(DeviceDstHandle, DstByteOffset),
-                offset(HostSrc, SrcByteOffset), ByteCount);
-    return Error::success();
-  }
-
-  Error copyD2D(const void *PlatformStreamHandle, const void *DeviceSrcHandle,
-                size_t SrcByteOffset, const void *DeviceDstHandle,
-                size_t DstByteOffset, size_t ByteCount) override {
-    std::memcpy(offset(DeviceDstHandle, DstByteOffset),
-                offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
-    return Error::success();
-  }
-
-  Error blockHostUntilDone(const void *PlatformStreamHandle) override {
-    // All host operations are synchronous anyway.
-    return Error::success();
-  }
-
-  Expected<void *> allocateDeviceMemory(size_t ByteCount) override {
-    return std::malloc(ByteCount);
-  }
-
-  Error freeDeviceMemory(const void *Handle) override {
-    std::free(const_cast<void *>(Handle));
-    return Error::success();
-  }
-
-  Error registerHostMemory(void *Memory, size_t ByteCount) override {
-    return Error::success();
-  }
-
-  Error unregisterHostMemory(const void *Memory) override {
-    return Error::success();
-  }
-
-  Error synchronousCopyD2H(const void *DeviceSrcHandle, size_t SrcByteOffset,
-                           void *HostDst, size_t DstByteOffset,
-                           size_t ByteCount) override {
-    std::memcpy(offset(HostDst, DstByteOffset),
-                offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
-    return Error::success();
-  }
-
-  Error synchronousCopyH2D(const void *HostSrc, size_t SrcByteOffset,
-                           const void *DeviceDstHandle, size_t DstByteOffset,
-                           size_t ByteCount) override {
-    std::memcpy(offset(DeviceDstHandle, DstByteOffset),
-                offset(HostSrc, SrcByteOffset), ByteCount);
-    return Error::success();
-  }
-
-  Error synchronousCopyD2D(const void *DeviceSrcHandle, size_t SrcByteOffset,
-                           const void *DeviceDstHandle, size_t DstByteOffset,
-                           size_t ByteCount) override {
-    std::memcpy(offset(DeviceDstHandle, DstByteOffset),
-                offset(DeviceSrcHandle, SrcByteOffset), ByteCount);
-    return Error::success();
-  }
-
-  /// Gets the value at the given index from a GlobalDeviceMemory<T> instance
-  /// created by this class.
-  template <typename T>
-  static T getDeviceValue(const streamexecutor::GlobalDeviceMemory<T> &Memory,
-                          size_t Index) {
-    return static_cast<const T *>(Memory.getHandle())[Index];
-  }
-
-private:
-  static void *offset(const void *Base, size_t Offset) {
-    return const_cast<char *>(static_cast<const char *>(Base) + Offset);
-  }
-};
-
-} // namespace host
-} // namespace streamexecutor
-
-#endif // STREAMEXECUTOR_PLATFORMS_HOST_HOSTPLATFORMDEVICE_H
diff --git a/parallel-libs/streamexecutor/lib/CMakeLists.txt b/parallel-libs/streamexecutor/lib/CMakeLists.txt
deleted file mode 100644
index 6157654a97a..00000000000
--- a/parallel-libs/streamexecutor/lib/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-macro(add_se_library name)
-  add_llvm_library(${name} ${ARGN})
-  set_target_properties(${name} PROPERTIES FOLDER "streamexecutor libraries")
-endmacro(add_se_library)
-
-add_subdirectory(platforms)
-
-add_se_library(
-    streamexecutor
-    Device.cpp
-    DeviceMemory.cpp
-    Error.cpp
-    HostMemory.cpp
-    Kernel.cpp
-    KernelSpec.cpp
-    PackedKernelArgumentArray.cpp
-    Platform.cpp
-    PlatformDevice.cpp
-    PlatformManager.cpp
-    Stream.cpp
-    ${STREAM_EXECUTOR_CUDA_PLATFORM_TARGET_OBJECT}
-    LINK_LIBS
-    ${STREAM_EXECUTOR_LIBCUDA_LIBRARIES})
-
-install(TARGETS streamexecutor DESTINATION lib)
diff --git a/parallel-libs/streamexecutor/lib/Device.cpp b/parallel-libs/streamexecutor/lib/Device.cpp
deleted file mode 100644
index 2bed3e7be16..00000000000
--- a/parallel-libs/streamexecutor/lib/Device.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- Device.cpp - Device implementation --------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of Device class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Device.h"
-
-#include <cassert>
-
-#include "streamexecutor/PlatformDevice.h"
-#include "streamexecutor/Stream.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace streamexecutor {
-
-Device::Device(PlatformDevice *PDevice) : PDevice(PDevice) {}
-
-Device::~Device() = default;
-
-Expected<Stream> Device::createStream() {
-  Expected<const void *> MaybePlatformStream = PDevice->createStream();
-  if (!MaybePlatformStream)
-    return MaybePlatformStream.takeError();
-  return Stream(PDevice, *MaybePlatformStream);
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp b/parallel-libs/streamexecutor/lib/DeviceMemory.cpp
deleted file mode 100644
index 8447a60b1ca..00000000000
--- a/parallel-libs/streamexecutor/lib/DeviceMemory.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- DeviceMemory.cpp - DeviceMemory implementation --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of DeviceMemory class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/DeviceMemory.h"
-
-#include "streamexecutor/Device.h"
-
-namespace streamexecutor {
-
-GlobalDeviceMemoryBase::~GlobalDeviceMemoryBase() {
-  if (Handle)
-    // TODO(jhen): How to handle errors here.
-    consumeError(TheDevice->freeDeviceMemory(*this));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Error.cpp b/parallel-libs/streamexecutor/lib/Error.cpp
deleted file mode 100644
index 0d728fab669..00000000000
--- a/parallel-libs/streamexecutor/lib/Error.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- Error.cpp - Error handling ----------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Types for returning recoverable errors.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Error.h"
-
-#include "llvm/ADT/StringRef.h"
-
-namespace {
-
-// An error with a string message describing the cause.
-class StreamExecutorError : public llvm::ErrorInfo<StreamExecutorError> {
-public:
-  StreamExecutorError(llvm::StringRef Message) : Message(Message.str()) {}
-
-  void log(llvm::raw_ostream &OS) const override { OS << Message; }
-
-  std::error_code convertToErrorCode() const override {
-    llvm_unreachable(
-        "StreamExecutorError does not support conversion to std::error_code");
-  }
-
-  std::string getErrorMessage() const { return Message; }
-
-  static char ID;
-
-private:
-  std::string Message;
-};
-
-char StreamExecutorError::ID = 0;
-
-} // namespace
-
-namespace streamexecutor {
-
-Error make_error(const Twine &Message) {
-  return llvm::make_error<StreamExecutorError>(Message.str());
-}
-
-std::string consumeAndGetMessage(Error &&E) {
-  if (!E)
-    return "success";
-  std::string Message;
-  llvm::handleAllErrors(std::move(E),
-                        [&Message](const StreamExecutorError &SEE) {
-                          Message = SEE.getErrorMessage();
-                        });
-  return Message;
-}
-
-void dieIfError(Error &&E) {
-  if (E) {
-    std::fprintf(stderr, "Error encountered: %s.\n",
-                 streamexecutor::consumeAndGetMessage(std::move(E)).c_str());
-    std::exit(EXIT_FAILURE);
-  }
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/HostMemory.cpp b/parallel-libs/streamexecutor/lib/HostMemory.cpp
deleted file mode 100644
index 8eba7e6b563..00000000000
--- a/parallel-libs/streamexecutor/lib/HostMemory.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- HostMemory.cpp - HostMemory implementation ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of HostMemory internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/HostMemory.h"
-#include "streamexecutor/Device.h"
-
-namespace streamexecutor {
-namespace internal {
-
-void destroyRegisteredHostMemoryInternals(Device *TheDevice, void *Pointer) {
-  // TODO(jhen): How to handle errors here?
-  if (Pointer)
-    consumeError(TheDevice->unregisterHostMemory(Pointer));
-}
-
-} // namespace internal
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Kernel.cpp b/parallel-libs/streamexecutor/lib/Kernel.cpp
deleted file mode 100644
index 911ac6656aa..00000000000
--- a/parallel-libs/streamexecutor/lib/Kernel.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- Kernel.cpp - General kernel implementation ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for kernel types.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Kernel.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "llvm/DebugInfo/Symbolize/Symbolize.h"
-
-namespace streamexecutor {
-
-KernelBase::KernelBase(PlatformDevice *D, const void *PlatformKernelHandle,
-                       llvm::StringRef Name)
-    : PDevice(D), PlatformKernelHandle(PlatformKernelHandle), Name(Name),
-      DemangledName(
-          llvm::symbolize::LLVMSymbolizer::DemangleName(Name, nullptr)) {
-  assert(D != nullptr &&
-         "cannot construct a kernel object with a null platform device");
-  assert(PlatformKernelHandle != nullptr &&
-         "cannot construct a kernel object with a null platform kernel handle");
-}
-
-KernelBase::KernelBase(KernelBase &&Other) noexcept
-    : PDevice(Other.PDevice), PlatformKernelHandle(Other.PlatformKernelHandle),
-      Name(std::move(Other.Name)),
-      DemangledName(std::move(Other.DemangledName)) {
-  Other.PDevice = nullptr;
-  Other.PlatformKernelHandle = nullptr;
-}
-
-KernelBase &KernelBase::operator=(KernelBase &&Other) noexcept {
-  PDevice = Other.PDevice;
-  PlatformKernelHandle = Other.PlatformKernelHandle;
-  Name = std::move(Other.Name);
-  DemangledName = std::move(Other.DemangledName);
-  Other.PDevice = nullptr;
-  Other.PlatformKernelHandle = nullptr;
-  return *this;
-}
-
-KernelBase::~KernelBase() {
-  if (PlatformKernelHandle)
-    // TODO(jhen): Handle the error here.
-    consumeError(PDevice->destroyKernel(PlatformKernelHandle));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/KernelSpec.cpp b/parallel-libs/streamexecutor/lib/KernelSpec.cpp
deleted file mode 100644
index 951ea8fc41c..00000000000
--- a/parallel-libs/streamexecutor/lib/KernelSpec.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- KernelSpec.cpp - General kernel spec implementation ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for kernel loader specs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/KernelSpec.h"
-
-#include "llvm/ADT/STLExtras.h"
-
-namespace streamexecutor {
-
-KernelLoaderSpec::KernelLoaderSpec(llvm::StringRef KernelName)
-    : KernelName(KernelName) {}
-
-CUDAPTXInMemorySpec::CUDAPTXInMemorySpec(
-    llvm::StringRef KernelName,
-    const llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList)
-    : KernelLoaderSpec(KernelName) {
-  for (const auto &Spec : SpecList)
-    PTXByComputeCapability.emplace(Spec.TheComputeCapability, Spec.PTXCode);
-}
-
-const char *CUDAPTXInMemorySpec::getCode(int ComputeCapabilityMajor,
-                                         int ComputeCapabilityMinor) const {
-  auto Iterator =
-      PTXByComputeCapability.upper_bound(CUDAPTXInMemorySpec::ComputeCapability{
-          ComputeCapabilityMajor, ComputeCapabilityMinor});
-  if (Iterator == PTXByComputeCapability.begin())
-    return nullptr;
-  --Iterator;
-  return Iterator->second;
-}
-
-CUDAFatbinInMemorySpec::CUDAFatbinInMemorySpec(llvm::StringRef KernelName,
-                                               const void *Bytes)
-    : KernelLoaderSpec(KernelName), Bytes(Bytes) {}
-
-OpenCLTextInMemorySpec::OpenCLTextInMemorySpec(llvm::StringRef KernelName,
-                                               const char *Text)
-    : KernelLoaderSpec(KernelName), Text(Text) {}
-
-void MultiKernelLoaderSpec::setKernelName(llvm::StringRef KernelName) {
-  if (TheKernelName)
-    assert(KernelName.equals(*TheKernelName) &&
-           "different kernel names in one MultiKernelLoaderSpec");
-  else
-    TheKernelName = llvm::make_unique<std::string>(KernelName);
-}
-
-MultiKernelLoaderSpec &MultiKernelLoaderSpec::addCUDAPTXInMemory(
-    llvm::StringRef KernelName,
-    llvm::ArrayRef<CUDAPTXInMemorySpec::PTXSpec> SpecList) {
-  assert((TheCUDAPTXInMemorySpec == nullptr) &&
-         "illegal loader spec overwrite");
-  setKernelName(KernelName);
-  TheCUDAPTXInMemorySpec =
-      llvm::make_unique<CUDAPTXInMemorySpec>(KernelName, SpecList);
-  return *this;
-}
-
-MultiKernelLoaderSpec &
-MultiKernelLoaderSpec::addCUDAFatbinInMemory(llvm::StringRef KernelName,
-                                             const void *Bytes) {
-  assert((TheCUDAFatbinInMemorySpec == nullptr) &&
-         "illegal loader spec overwrite");
-  setKernelName(KernelName);
-  TheCUDAFatbinInMemorySpec =
-      llvm::make_unique<CUDAFatbinInMemorySpec>(KernelName, Bytes);
-  return *this;
-}
-
-MultiKernelLoaderSpec &
-MultiKernelLoaderSpec::addOpenCLTextInMemory(llvm::StringRef KernelName,
-                                             const char *OpenCLText) {
-  assert((TheOpenCLTextInMemorySpec == nullptr) &&
-         "illegal loader spec overwrite");
-  setKernelName(KernelName);
-  TheOpenCLTextInMemorySpec =
-      llvm::make_unique<OpenCLTextInMemorySpec>(KernelName, OpenCLText);
-  return *this;
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp b/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp
deleted file mode 100644
index 04ac80d74ed..00000000000
--- a/parallel-libs/streamexecutor/lib/PackedKernelArgumentArray.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- PackedKernelArgumentArray.cpp - Packed argument array impl --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation details for classes from PackedKernelArgumentArray.h.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PackedKernelArgumentArray.h"
-
-namespace streamexecutor {
-
-PackedKernelArgumentArrayBase::~PackedKernelArgumentArrayBase() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Platform.cpp b/parallel-libs/streamexecutor/lib/Platform.cpp
deleted file mode 100644
index 4250468a022..00000000000
--- a/parallel-libs/streamexecutor/lib/Platform.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- Platform.cpp - Platform implementation ----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of Platform class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Platform.h"
-
-namespace streamexecutor {
-
-Platform::~Platform() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp b/parallel-libs/streamexecutor/lib/PlatformDevice.cpp
deleted file mode 100644
index 8dd44a3a4aa..00000000000
--- a/parallel-libs/streamexecutor/lib/PlatformDevice.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- PlatformDevice.cpp - Platform interface implementations -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation file for PlatformDevice.h.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PlatformDevice.h"
-
-namespace streamexecutor {
-
-PlatformDevice::~PlatformDevice() = default;
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/PlatformManager.cpp b/parallel-libs/streamexecutor/lib/PlatformManager.cpp
deleted file mode 100644
index 8f44befbd71..00000000000
--- a/parallel-libs/streamexecutor/lib/PlatformManager.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- PlatformManager.cpp - PlatformManager implementation --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of PlatformManager class internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/PlatformManager.h"
-
-#include "streamexecutor/PlatformOptions.h"
-#include "streamexecutor/platforms/host/HostPlatform.h"
-
-#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
-#endif
-
-namespace streamexecutor {
-
-PlatformManager::PlatformManager() {
-  // TODO(jhen): Register known platforms by name.
-  // We have a couple of options here:
-  //  * Use build-system flags to set preprocessor macros that select the
-  //    appropriate code to include here.
-  //  * Use static initialization tricks to have platform libraries register
-  //    themselves when they are loaded.
-
-  PlatformsByName.emplace("host", llvm::make_unique<host::HostPlatform>());
-
-#ifdef STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM
-  PlatformsByName.emplace("cuda", llvm::make_unique<cuda::CUDAPlatform>());
-#endif
-}
-
-Expected<Platform *> PlatformManager::getPlatformByName(llvm::StringRef Name) {
-  static PlatformManager Instance;
-  auto Iterator = Instance.PlatformsByName.find(Name.lower());
-  if (Iterator != Instance.PlatformsByName.end())
-    return Iterator->second.get();
-  return make_error("no available platform with name " + Name);
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/Stream.cpp b/parallel-libs/streamexecutor/lib/Stream.cpp
deleted file mode 100644
index fe135b4d0af..00000000000
--- a/parallel-libs/streamexecutor/lib/Stream.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- Stream.cpp - General stream implementation ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the implementation details for a general stream object.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cassert>
-
-#include "streamexecutor/Stream.h"
-
-namespace streamexecutor {
-
-Stream::Stream(PlatformDevice *D, const void *PlatformStreamHandle)
-    : PDevice(D), PlatformStreamHandle(PlatformStreamHandle),
-      ErrorMessageMutex(llvm::make_unique<llvm::sys::RWMutex>()) {
-  assert(D != nullptr &&
-         "cannot construct a stream object with a null platform device");
-  assert(PlatformStreamHandle != nullptr &&
-         "cannot construct a stream object with a null platform stream handle");
-}
-
-Stream::Stream(Stream &&Other) noexcept
-    : PDevice(Other.PDevice), PlatformStreamHandle(Other.PlatformStreamHandle),
-      ErrorMessageMutex(std::move(Other.ErrorMessageMutex)),
-      ErrorMessage(std::move(Other.ErrorMessage)) {
-  Other.PDevice = nullptr;
-  Other.PlatformStreamHandle = nullptr;
-}
-
-Stream &Stream::operator=(Stream &&Other) noexcept {
-  PDevice = Other.PDevice;
-  PlatformStreamHandle = Other.PlatformStreamHandle;
-  ErrorMessageMutex = std::move(Other.ErrorMessageMutex);
-  ErrorMessage = std::move(Other.ErrorMessage);
-  Other.PDevice = nullptr;
-  Other.PlatformStreamHandle = nullptr;
-  return *this;
-}
-
-Stream::~Stream() {
-  if (PlatformStreamHandle)
-    // TODO(jhen): Handle error condition here.
-    consumeError(PDevice->destroyStream(PlatformStreamHandle));
-}
-
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
deleted file mode 100644
index 0802c059add..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
-    add_subdirectory(cuda)
-endif()
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt b/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt
deleted file mode 100644
index 5be76d1c75a..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-add_library(
-    streamexecutor_cuda_platform
-    OBJECT
-    CUDAPlatform.cpp
-    CUDAPlatformDevice.cpp)
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
deleted file mode 100644
index 9f9e4388647..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatform.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-//===-- CUDAPlatform.cpp - CUDA platform implementation -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of CUDA platform internals.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/platforms/cuda/CUDAPlatform.h"
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Platform.h"
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-
-#include "llvm/Support/Mutex.h"
-
-#include "cuda.h"
-
-#include <map>
-
-namespace streamexecutor {
-namespace cuda {
-
-static CUresult ensureCUDAInitialized() {
-  static CUresult InitResult = []() { return cuInit(0); }();
-  return InitResult;
-}
-
-size_t CUDAPlatform::getDeviceCount() const {
-  if (ensureCUDAInitialized())
-    // TODO(jhen): Log an error.
-    return 0;
-
-  int DeviceCount = 0;
-  CUresult Result = cuDeviceGetCount(&DeviceCount);
-  (void)Result;
-  // TODO(jhen): Log an error.
-
-  return DeviceCount;
-}
-
-Expected<Device> CUDAPlatform::getDevice(size_t DeviceIndex) {
-  if (CUresult InitResult = ensureCUDAInitialized())
-    return CUresultToError(InitResult, "cached cuInit return value");
-
-  llvm::sys::ScopedLock Lock(Mutex);
-  auto Iterator = PlatformDevices.find(DeviceIndex);
-  if (Iterator == PlatformDevices.end()) {
-    if (auto MaybePDevice = CUDAPlatformDevice::create(DeviceIndex)) {
-      Iterator =
-          PlatformDevices.emplace(DeviceIndex, std::move(*MaybePDevice)).first;
-    } else {
-      return MaybePDevice.takeError();
-    }
-  }
-  return Device(&Iterator->second);
-}
-
-} // namespace cuda
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp b/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
deleted file mode 100644
index 5284a9a0a35..00000000000
--- a/parallel-libs/streamexecutor/lib/platforms/cuda/CUDAPlatformDevice.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-//===-- CUDAPlatformDevice.cpp - CUDAPlatformDevice implementation --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Implementation of CUDAPlatformDevice.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/platforms/cuda/CUDAPlatformDevice.h"
-#include "streamexecutor/PlatformDevice.h"
-
-#include "cuda.h"
-
-namespace streamexecutor {
-namespace cuda {
-
-static void *offset(const void *Base, size_t Offset) {
-  return const_cast<char *>(static_cast<const char *>(Base) + Offset);
-}
-
-Error CUresultToError(int CUResult, const llvm::Twine &Message) {
-  CUresult Result = static_cast<CUresult>(CUResult);
-  if (Result) {
-    const char *ErrorName;
-    if (cuGetErrorName(Result, &ErrorName))
-      ErrorName = "UNKNOWN ERROR NAME";
-    const char *ErrorString;
-    if (cuGetErrorString(Result, &ErrorString))
-      ErrorString = "UNKNOWN ERROR DESCRIPTION";
-    return make_error("CUDA driver error: '" + Message + "', error code = " +
-                      llvm::Twine(static_cast<int>(Result)) + ", name = " +
-                      ErrorName + ", description = '" + ErrorString + "'");
-  } else
-    return Error::success();
-}
-
-std::string CUDAPlatformDevice::getName() const {
-  static std::string CachedName = [](int DeviceIndex) {
-    static constexpr size_t MAX_DRIVER_NAME_BYTES = 1024;
-    std::string Name = "CUDA device " + std::to_string(DeviceIndex);
-    char NameFromDriver[MAX_DRIVER_NAME_BYTES];
-    if (!cuDeviceGetName(NameFromDriver, MAX_DRIVER_NAME_BYTES - 1,
-                         DeviceIndex)) {
-      NameFromDriver[MAX_DRIVER_NAME_BYTES - 1] = '\0';
-      Name.append(": ").append(NameFromDriver);
-    }
-    return Name;
-  }(DeviceIndex);
-  return CachedName;
-}
-
-Expected<CUDAPlatformDevice> CUDAPlatformDevice::create(size_t DeviceIndex) {
-  CUdevice DeviceHandle;
-  if (CUresult Result = cuDeviceGet(&DeviceHandle, DeviceIndex))
-    return CUresultToError(Result, "cuDeviceGet");
-
-  CUcontext ContextHandle;
-  if (CUresult Result = cuDevicePrimaryCtxRetain(&ContextHandle, DeviceHandle))
-    return CUresultToError(Result, "cuDevicePrimaryCtxRetain");
-
-  if (CUresult Result = cuCtxSetCurrent(ContextHandle))
-    return CUresultToError(Result, "cuCtxSetCurrent");
-
-  return CUDAPlatformDevice(DeviceIndex);
-}
-
-CUDAPlatformDevice::CUDAPlatformDevice(CUDAPlatformDevice &&Other) noexcept
-    : DeviceIndex(Other.DeviceIndex) {
-  Other.DeviceIndex = -1;
-}
-
-CUDAPlatformDevice &CUDAPlatformDevice::
-operator=(CUDAPlatformDevice &&Other) noexcept {
-  DeviceIndex = Other.DeviceIndex;
-  Other.DeviceIndex = -1;
-  return *this;
-}
-
-CUDAPlatformDevice::~CUDAPlatformDevice() {
-  CUresult Result = cuDevicePrimaryCtxRelease(DeviceIndex);
-  (void)Result;
-  // TODO(jhen): Log error.
-}
-
-Expected<const void *>
-CUDAPlatformDevice::createKernel(const MultiKernelLoaderSpec &Spec) {
-  // TODO(jhen): Maybe first check loaded modules?
-  if (!Spec.hasCUDAPTXInMemory())
-    return make_error("no CUDA code available to create kernel");
-
-  CUdevice Device = static_cast<int>(DeviceIndex);
-  int ComputeCapabilityMajor = 0;
-  int ComputeCapabilityMinor = 0;
-  if (CUresult Result = cuDeviceGetAttribute(
-          &ComputeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
-          Device))
-    return CUresultToError(
-        Result,
-        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR");
-  if (CUresult Result = cuDeviceGetAttribute(
-          &ComputeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
-          Device))
-    return CUresultToError(
-        Result,
-        "cuDeviceGetAttribute CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR");
-  const char *Code = Spec.getCUDAPTXInMemory().getCode(ComputeCapabilityMajor,
-                                                       ComputeCapabilityMinor);
-
-  if (!Code)
-    return make_error("no suitable CUDA source found for compute capability " +
-                      llvm::Twine(ComputeCapabilityMajor) + "." +
-                      llvm::Twine(ComputeCapabilityMinor));
-
-  CUmodule Module;
-  if (CUresult Result = cuModuleLoadData(&Module, Code))
-    return CUresultToError(Result, "cuModuleLoadData");
-
-  CUfunction Function;
-  if (CUresult Result =
-          cuModuleGetFunction(&Function, Module, Spec.getKernelName().c_str()))
-    return CUresultToError(Result, "cuModuleGetFunction");
-
-  // TODO(jhen): Should I save this function pointer in case someone asks for
-  // it again?
-
-  // TODO(jhen): Should I save the module pointer so I can unload it when I
-  // destroy this device?
-
-  return static_cast<const void *>(Function);
-}
-
-Error CUDAPlatformDevice::destroyKernel(const void *Handle) {
-  // TODO(jhen): Maybe keep track of kernels for each module and unload the
-  // module after they are all destroyed.
-  return Error::success();
-}
-
-Expected<const void *> CUDAPlatformDevice::createStream() {
-  CUstream Stream;
-  if (CUresult Result = cuStreamCreate(&Stream, CU_STREAM_DEFAULT))
-    return CUresultToError(Result, "cuStreamCreate");
-  return Stream;
-}
-
-Error CUDAPlatformDevice::destroyStream(const void *Handle) {
-  return CUresultToError(
-      cuStreamDestroy(static_cast<CUstream>(const_cast<void *>(Handle))),
-      "cuStreamDestroy");
-}
-
-Error CUDAPlatformDevice::launch(
-    const void *PlatformStreamHandle, BlockDimensions BlockSize,
-    GridDimensions GridSize, const void *PKernelHandle,
-    const PackedKernelArgumentArrayBase &ArgumentArray) {
-  CUfunction Function =
-      reinterpret_cast<CUfunction>(const_cast<void *>(PKernelHandle));
-  CUstream Stream =
-      reinterpret_cast<CUstream>(const_cast<void *>(PlatformStreamHandle));
-
-  auto Launch = [Function, Stream, BlockSize,
-                 GridSize](size_t SharedMemoryBytes, void **ArgumentAddresses) {
-    return CUresultToError(
-        cuLaunchKernel(Function,                              //
-                       GridSize.X, GridSize.Y, GridSize.Z,    //
-                       BlockSize.X, BlockSize.Y, BlockSize.Z, //
-                       SharedMemoryBytes, Stream, ArgumentAddresses, nullptr),
-        "cuLaunchKernel");
-  };
-
-  void **ArgumentAddresses = const_cast<void **>(ArgumentArray.getAddresses());
-  size_t SharedArgumentCount = ArgumentArray.getSharedCount();
-  if (SharedArgumentCount) {
-    // The argument handling in this case is not very efficient. We may need to
-    // come back and optimize it later.
-    //
-    // Perhaps introduce another branch for the case where there is exactly one
-    // shared memory argument and it is the first one. This is the only case
-    // that will be used for compiler-generated CUDA kernels, and OpenCL users
-    // can choose to take advantage of it by combining their dynamic shared
-    // memory arguments and putting them first in the kernel signature.
-    unsigned SharedMemoryBytes = 0;
-    size_t ArgumentCount = ArgumentArray.getArgumentCount();
-    llvm::SmallVector<void *, 16> NonSharedArgumentAddresses(
-        ArgumentCount - SharedArgumentCount);
-    size_t NonSharedIndex = 0;
-    for (size_t I = 0; I < ArgumentCount; ++I)
-      if (ArgumentArray.getType(I) == KernelArgumentType::SHARED_DEVICE_MEMORY)
-        SharedMemoryBytes += ArgumentArray.getSize(I);
-      else
-        NonSharedArgumentAddresses[NonSharedIndex++] = ArgumentAddresses[I];
-    return Launch(SharedMemoryBytes, NonSharedArgumentAddresses.data());
-  }
-  return Launch(0, ArgumentAddresses);
-}
-
-Error CUDAPlatformDevice::copyD2H(const void *PlatformStreamHandle,
-                                  const void *DeviceSrcHandle,
-                                  size_t SrcByteOffset, void *HostDst,
-                                  size_t DstByteOffset, size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyDtoHAsync(
-          offset(HostDst, DstByteOffset),
-          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
-          ByteCount,
-          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
-      "cuMemcpyDtoHAsync");
-}
-
-Error CUDAPlatformDevice::copyH2D(const void *PlatformStreamHandle,
-                                  const void *HostSrc, size_t SrcByteOffset,
-                                  const void *DeviceDstHandle,
-                                  size_t DstByteOffset, size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyHtoDAsync(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          offset(HostSrc, SrcByteOffset), ByteCount,
-          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
-      "cuMemcpyHtoDAsync");
-}
-
-Error CUDAPlatformDevice::copyD2D(const void *PlatformStreamHandle,
-                                  const void *DeviceSrcHandle,
-                                  size_t SrcByteOffset,
-                                  const void *DeviceDstHandle,
-                                  size_t DstByteOffset, size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyDtoDAsync(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
-          ByteCount,
-          static_cast<CUstream>(const_cast<void *>(PlatformStreamHandle))),
-      "cuMemcpyDtoDAsync");
-}
-
-Error CUDAPlatformDevice::blockHostUntilDone(const void *PlatformStreamHandle) {
-  return CUresultToError(cuStreamSynchronize(static_cast<CUstream>(
-                             const_cast<void *>(PlatformStreamHandle))),
-                         "cuStreamSynchronize");
-}
-
-Expected<void *> CUDAPlatformDevice::allocateDeviceMemory(size_t ByteCount) {
-  CUdeviceptr Pointer;
-  if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
-    return CUresultToError(Result, "cuMemAlloc");
-  return reinterpret_cast<void *>(Pointer);
-}
-
-Error CUDAPlatformDevice::freeDeviceMemory(const void *Handle) {
-  return CUresultToError(cuMemFree(reinterpret_cast<CUdeviceptr>(Handle)),
-                         "cuMemFree");
-}
-
-Error CUDAPlatformDevice::registerHostMemory(void *Memory, size_t ByteCount) {
-  return CUresultToError(cuMemHostRegister(Memory, ByteCount, 0u),
-                         "cuMemHostRegister");
-}
-
-Error CUDAPlatformDevice::unregisterHostMemory(const void *Memory) {
-  return CUresultToError(cuMemHostUnregister(const_cast<void *>(Memory)),
-                         "cuMemHostUnregister");
-}
-
-Error CUDAPlatformDevice::synchronousCopyD2H(const void *DeviceSrcHandle,
-                                             size_t SrcByteOffset,
-                                             void *HostDst,
-                                             size_t DstByteOffset,
-                                             size_t ByteCount) {
-  return CUresultToError(cuMemcpyDtoH(offset(HostDst, DstByteOffset),
-                                      reinterpret_cast<CUdeviceptr>(offset(
-                                          DeviceSrcHandle, SrcByteOffset)),
-                                      ByteCount),
-                         "cuMemcpyDtoH");
-}
-
-Error CUDAPlatformDevice::synchronousCopyH2D(const void *HostSrc,
-                                             size_t SrcByteOffset,
-                                             const void *DeviceDstHandle,
-                                             size_t DstByteOffset,
-                                             size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyHtoD(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          offset(HostSrc, SrcByteOffset), ByteCount),
-      "cuMemcpyHtoD");
-}
-
-Error CUDAPlatformDevice::synchronousCopyD2D(const void *DeviceDstHandle,
-                                             size_t DstByteOffset,
-                                             const void *DeviceSrcHandle,
-                                             size_t SrcByteOffset,
-                                             size_t ByteCount) {
-  return CUresultToError(
-      cuMemcpyDtoD(
-          reinterpret_cast<CUdeviceptr>(offset(DeviceDstHandle, DstByteOffset)),
-          reinterpret_cast<CUdeviceptr>(offset(DeviceSrcHandle, SrcByteOffset)),
-          ByteCount),
-      "cuMemcpyDtoD");
-}
-
-} // namespace cuda
-} // namespace streamexecutor
diff --git a/parallel-libs/streamexecutor/tools/streamexecutor-config/CMakeLists.txt b/parallel-libs/streamexecutor/tools/streamexecutor-config/CMakeLists.txt
deleted file mode 100644
index 7c0e5b001ea..00000000000
--- a/parallel-libs/streamexecutor/tools/streamexecutor-config/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-find_package(PythonInterp REQUIRED)
-configure_file(streamexecutor-config.in streamexecutor-config)
-install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/streamexecutor-config DESTINATION bin)
diff --git a/parallel-libs/streamexecutor/tools/streamexecutor-config/streamexecutor-config.in b/parallel-libs/streamexecutor/tools/streamexecutor-config/streamexecutor-config.in
deleted file mode 100755
index f3e15305d27..00000000000
--- a/parallel-libs/streamexecutor/tools/streamexecutor-config/streamexecutor-config.in
+++ /dev/null
@@ -1,231 +0,0 @@
-#!@PYTHON_EXECUTABLE@
-#
-#===- streamexecutor-config - Build config script for SE -----*- python -*--===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-r"""
-Get configuration info needed to compile programs which use StreamExecutor.
-
-Runs llvm-config and adds StreamExecutor-specific flags to the output. Supports
-only the subset of llvm-config flags that are relevant for applications
-compiling against StreamExecutor.
-
-This utility will typically be used to construct a compile command line for an
-application which depends on the StreamExecutor library.
-
-For example:
-  c++ example.cpp -o example \
-      $(streamexecutor-config \
-          --cppflags --cxxflags --ldflags --libs --system-libs)
-"""
-
-import argparse
-import errno
-import os
-import shlex
-import subprocess
-import sys
-
-# The following functions are configured by cmake. They use raw triple-quoted
-# strings to surround values that are substituted by cmake at configure time.
-# This kind of quoting should allow for paths that contain spaces.
-
-def get_llvm_config_dir():
-  """Gets the path to the llvm-config executable."""
-  return r"""@LLVM_BINARY_DIR@/bin"""
-
-def get_cmake_install_prefix():
-  """Gets the value of the cmake variable CMAKE_INSTALL_PREFIX."""
-  return r"""@CMAKE_INSTALL_PREFIX@"""
-
-def convert_library_name(library_name):
-  """Converts a library name ending in '.framework' into a '-framework' flag.
-
-  This is used to support OS X.
-
-  >>> convert_library_name('')
-  ''
-
-  >>> convert_library_name('/usr/local/lib64/libcuda.so')
-  '/usr/local/lib64/libcuda.so'
-
-  >>> convert_library_name('/Library/Frameworks/cuda.framework')
-  '-framework cuda'
-  """
-  framework_suffix = '.framework'
-  if library_name.endswith(framework_suffix):
-    framework_name = os.path.basename(library_name)[:-len(framework_suffix)]
-    library_name = '-framework ' + framework_name
-  return library_name
-
-def get_cuda_driver_library():
-  """Gets the value of the cmake variable CUDA_DRIVER_LIBRARY."""
-  return convert_library_name(r"""@CUDA_DRIVER_LIBRARY@""")
-
-def cuddle_flag(flag, tokens):
-  """If flag appears by itself in tokens, combines it with the next token.
-
-  >>> tokens = ['-I', '/usr/include']
-  >>> cuddle_flag('-I', tokens)
-  >>> tokens
-  ['-I/usr/include']
-
-  >>> tokens = ['-L', '/usr/lib']
-  >>> cuddle_flag('-L', tokens)
-  >>> tokens
-  ['-L/usr/lib']
-
-  >>> tokens = ['-I']
-  >>> cuddle_flag('-I', tokens)
-  >>> tokens
-  ['-I']
-
-  >>> tokens = ['-I', '/usr/include', '-I', '/usr/local/include']
-  >>> cuddle_flag('-I', tokens)
-  >>> tokens
-  ['-I/usr/include', '-I/usr/local/include']
-  """
-  start = 0
-  while True:
-    try:
-      index = tokens.index(flag, start)
-    except ValueError:
-      return
-    if index + 1 < len(tokens):
-      follower = tokens.pop(index + 1)
-      tokens[index] = flag + follower
-    start = index + 1
-
-def get_llvm_config_output_for_dir(llvm_config_dir, flags_string):
-  """Calls llvm-config at the given path and returns the output with -I and -L
-  flags cuddled."""
-  output = subprocess.check_output(
-      ['%s/llvm-config' % llvm_config_dir] + flags_string.split()).strip()
-  tokens = shlex.split(output)
-  cuddle_flag('-I', tokens)
-  cuddle_flag('-L', tokens)
-  return ' '.join(tokens)
-
-def has_token(token, string):
-  """Checks if the given token appears in the string.
-
-  The token argument must be a single shell token.
-
-  >>> string = '-I/usr/include -L"/usr/lib"'
-  >>> has_token('-I/usr/include', string)
-  True
-  >>> has_token('-I/usr/local/include', string)
-  False
-  >>> has_token('-I"/usr/include"', string)
-  True
-  >>> has_token('-L"/usr/lib"', string)
-  True
-  >>> has_token('-L/usr/lib', string)
-  True
-  """
-  split_token = shlex.split(token)
-  if len(split_token) > 1:
-    raise ValueError('has_token called with a multi-token token: ' + token)
-  escaped_token = split_token[0]
-  return escaped_token in shlex.split(string)
-
-def main():
-  parser = argparse.ArgumentParser(
-      prog='streamexecutor-config',
-      formatter_class=argparse.RawDescriptionHelpFormatter,
-      description=__doc__)
-
-  parser.add_argument(
-      '--cppflags',
-      action='store_true',
-      help=
-        'C preprocessor flags for files that include StreamExecutor headers.')
-
-  parser.add_argument(
-      '--cxxflags',
-      action='store_true',
-      help='C++ compiler flags for files that include StreamExecutor headers.')
-
-  parser.add_argument(
-      '--ldflags',
-      action='store_true',
-      help='Print linker flags.')
-
-  parser.add_argument(
-      '--libs',
-      action='store_true',
-      help='Libraries needed to link against StreamExecutor.')
-
-  parser.add_argument(
-      '--system-libs',
-      action='store_true',
-      help='System libraries needed to link against StreamExecutor.')
-
-  parser.add_argument(
-      '--llvm-config-dir',
-      default=get_llvm_config_dir(),
-      help='Directory containing the llvm-config executable. '\
-          'If not specified, defaults to the cmake-configured location')
-
-  args = parser.parse_args()
-
-  # Print the help message if the user did not pass any flag arguments.
-  if not any(
-      getattr(args, flag)
-        for flag in ('cppflags', 'cxxflags', 'ldflags', 'libs', 'system_libs')):
-    parser.print_help()
-    sys.exit(1)
-
-  # Check for the presence of the llvm-config executable.
-  if not os.path.isfile('%s/llvm-config' % args.llvm_config_dir):
-    sys.exit('llvm-config not found in: ' + args.llvm_config_dir)
-  if not os.access('%s/llvm-config' % args.llvm_config_dir, os.X_OK):
-    sys.exit('llvm-config not executable in: ' + args.llvm_config_dir)
-
-  # We will always use args.llvm_config_dir as the second argument to
-  # get_llvm_config_output_for_path.
-  get_llvm_config_output = lambda flags : get_llvm_config_output_for_dir(
-      args.llvm_config_dir, flags)
-
-  all_flags = []
-
-  if args.cppflags:
-    llvm_flags = get_llvm_config_output('--cppflags')
-    all_flags.append(llvm_flags)
-    se_flag = "-I%s/include" % get_cmake_install_prefix()
-    if not has_token(token=se_flag, string=llvm_flags):
-      all_flags.append(se_flag)
-
-  if args.cxxflags:
-    all_flags.append(get_llvm_config_output('--cxxflags'))
-
-  if args.ldflags:
-    llvm_flags = get_llvm_config_output('--ldflags')
-    all_flags.append(llvm_flags)
-    se_flag = "-L%s/lib" % get_cmake_install_prefix()
-    if not has_token(token=se_flag, string=llvm_flags):
-      all_flags.append(se_flag)
-
-  if args.libs:
-    llvm_flags = get_llvm_config_output('--libs support symbolize')
-    se_flag = '-lstreamexecutor'
-    if not has_token(token=se_flag, string=llvm_flags):
-      all_flags.append(se_flag)
-    cuda_driver_library = get_cuda_driver_library()
-    if cuda_driver_library:
-      all_flags.append(cuda_driver_library)
-    all_flags.append(llvm_flags)
-
-  if args.system_libs:
-    all_flags.append(get_llvm_config_output('--system-libs'))
-
-  print(' '.join(all_flags))
-
-if __name__ == '__main__':
-  main()
diff --git a/parallel-libs/streamexecutor/unittests/CMakeLists.txt b/parallel-libs/streamexecutor/unittests/CMakeLists.txt
deleted file mode 100644
index 3a8142273a5..00000000000
--- a/parallel-libs/streamexecutor/unittests/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-add_custom_target(StreamExecutorUnitTests)
-set_target_properties(StreamExecutorUnitTests PROPERTIES FOLDER "streamexecutor tests")
-
-function(add_se_unittest testdir_name)
-  add_unittest(StreamExecutorUnitTests ${testdir_name} ${ARGN})
-  target_link_libraries(${testdir_name} streamexecutor)
-endfunction()
-
-add_subdirectory(CoreTests)
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt b/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt
deleted file mode 100644
index 7a70ca88245..00000000000
--- a/parallel-libs/streamexecutor/unittests/CoreTests/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(STREAM_EXECUTOR_ENABLE_CUDA_PLATFORM)
-    set(CUDA_TEST_SOURCES cuda/CUDATest.cpp)
-endif()
-
-add_se_unittest(
-  StreamExecutorCoreTests
-  DeviceTest.cpp
-  KernelSpecTest.cpp
-  PackedKernelArgumentArrayTest.cpp
-  StreamTest.cpp
-  ${CUDA_TEST_SOURCES}
-)
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp
deleted file mode 100644
index ab111e52a5a..00000000000
--- a/parallel-libs/streamexecutor/unittests/CoreTests/DeviceTest.cpp
+++ /dev/null
@@ -1,378 +0,0 @@
-//===-- DeviceTest.cpp - Tests for Device ---------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the unit tests for Device code.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cstdlib>
-#include <cstring>
-
-#include "streamexecutor/Device.h"
-#include "streamexecutor/PlatformDevice.h"
-#include "streamexecutor/platforms/host/HostPlatformDevice.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace se = ::streamexecutor;
-
-const auto &getDeviceValue = se::host::HostPlatformDevice::getDeviceValue<int>;
-
-/// Test fixture to hold objects used by tests.
-class DeviceTest : public ::testing::Test {
-public:
-  DeviceTest()
-      : Device(&PDevice), HostA5{0, 1, 2, 3, 4}, HostB5{5, 6, 7, 8, 9},
-        HostA7{10, 11, 12, 13, 14, 15, 16}, HostB7{17, 18, 19, 20, 21, 22, 23},
-        DeviceA5(getOrDie(Device.allocateDeviceMemory<int>(5))),
-        DeviceB5(getOrDie(Device.allocateDeviceMemory<int>(5))),
-        DeviceA7(getOrDie(Device.allocateDeviceMemory<int>(7))),
-        DeviceB7(getOrDie(Device.allocateDeviceMemory<int>(7))),
-        Host5{24, 25, 26, 27, 28}, Host7{29, 30, 31, 32, 33, 34, 35} {
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostA5, DeviceA5));
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostB5, DeviceB5));
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostA7, DeviceA7));
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostB7, DeviceB7));
-  }
-
-  se::host::HostPlatformDevice PDevice;
-  se::Device Device;
-
-  // Device memory is backed by host arrays.
-  int HostA5[5];
-  int HostB5[5];
-  int HostA7[7];
-  int HostB7[7];
-  se::GlobalDeviceMemory<int> DeviceA5;
-  se::GlobalDeviceMemory<int> DeviceB5;
-  se::GlobalDeviceMemory<int> DeviceA7;
-  se::GlobalDeviceMemory<int> DeviceB7;
-
-  // Host memory to be used as actual host memory.
-  int Host5[5];
-  int Host7[7];
-};
-
-#define EXPECT_NO_ERROR(E) EXPECT_FALSE(static_cast<bool>(E))
-#define EXPECT_ERROR(E)                                                        \
-  do {                                                                         \
-    se::Error E__ = E;                                                         \
-    EXPECT_TRUE(static_cast<bool>(E__));                                       \
-    consumeError(std::move(E__));                                              \
-  } while (false)
-
-using llvm::ArrayRef;
-using llvm::MutableArrayRef;
-
-TEST_F(DeviceTest, GetName) { EXPECT_EQ(Device.getName(), "host"); }
-
-TEST_F(DeviceTest, AllocateAndFreeDeviceMemory) {
-  se::Expected<se::GlobalDeviceMemory<int>> MaybeMemory =
-      Device.allocateDeviceMemory<int>(10);
-  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
-}
-
-TEST_F(DeviceTest, RegisterAndUnregisterHostMemory) {
-  std::vector<int> Data(10);
-  se::Expected<se::RegisteredHostMemory<int>> MaybeMemory =
-      Device.registerHostMemory<int>(Data);
-  EXPECT_TRUE(static_cast<bool>(MaybeMemory));
-}
-
-// D2H tests
-
-TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRefByCount) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2H(DeviceB5, MutableArrayRef<int>(Host5), 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(HostB5[I], Host5[I]);
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5), 7));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7), 7));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5), 7));
-}
-
-TEST_F(DeviceTest, SyncCopyD2HToMutableArrayRef) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host5)));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2H(DeviceA7, MutableArrayRef<int>(Host5)));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2H(DeviceA5, MutableArrayRef<int>(Host7)));
-}
-
-TEST_F(DeviceTest, SyncCopyD2HToPointer) {
-  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA5, Host5, 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5, Host7, 7));
-}
-
-TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRefByCount) {
-  EXPECT_NO_ERROR(Device.synchronousCopyD2H(
-      DeviceA5.asSlice().slice(1), MutableArrayRef<int>(Host5 + 1, 4), 4));
-  for (int I = 1; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceB5.asSlice().drop_back(1),
-                                            MutableArrayRef<int>(Host5), 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(HostB5[I], Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice(),
-                                         MutableArrayRef<int>(Host5), 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
-                                         MutableArrayRef<int>(Host7), 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
-                                         MutableArrayRef<int>(Host5), 7));
-}
-
-TEST_F(DeviceTest, SyncCopyD2HSliceToMutableArrayRef) {
-  EXPECT_NO_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().slice(1, 5),
-                                            MutableArrayRef<int>(Host5)));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA7[I + 1], Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA7.asSlice().drop_back(1),
-                                         MutableArrayRef<int>(Host5)));
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(),
-                                         MutableArrayRef<int>(Host7)));
-}
-
-TEST_F(DeviceTest, SyncCopyD2HSliceToPointer) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2H(DeviceA5.asSlice().slice(1), Host5 + 1, 4));
-  for (int I = 1; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyD2H(DeviceA5.asSlice(), Host7, 7));
-}
-
-// H2D tests
-
-TEST_F(DeviceTest, SyncCopyH2DToArrayRefByCount) {
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceB5, 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5, 7));
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7, 7));
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5, 7));
-}
-
-TEST_F(DeviceTest, SyncCopyH2DToArrayRef) {
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7));
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5));
-}
-
-TEST_F(DeviceTest, SyncCopyH2DToPointer) {
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5, 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5, 7));
-}
-
-TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRefByCount) {
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(ArrayRef<int>(Host5 + 1, 4),
-                                            DeviceA5.asSlice().slice(1), 4));
-  for (int I = 1; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(
-      ArrayRef<int>(Host5), DeviceB5.asSlice().drop_back(1), 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
-
-  EXPECT_ERROR(
-      Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice(), 7));
-}
-
-TEST_F(DeviceTest, SyncCopyH2DSliceToArrayRef) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA5.asSlice()));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  EXPECT_ERROR(
-      Device.synchronousCopyH2D(ArrayRef<int>(Host5), DeviceA7.asSlice()));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyH2D(ArrayRef<int>(Host7), DeviceA5.asSlice()));
-}
-
-TEST_F(DeviceTest, SyncCopyH2DSliceToPointer) {
-  EXPECT_NO_ERROR(Device.synchronousCopyH2D(Host5, DeviceA5.asSlice(), 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  EXPECT_ERROR(Device.synchronousCopyH2D(Host7, DeviceA5.asSlice(), 7));
-}
-
-// D2D tests
-
-TEST_F(DeviceTest, SyncCopyD2DByCount) {
-  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB7, 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5, 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5, 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7, 7));
-}
-
-TEST_F(DeviceTest, SyncCopyD2D) {
-  EXPECT_NO_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7));
-}
-
-TEST_F(DeviceTest, SyncCopySliceD2DByCount) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice().slice(1), DeviceB5, 4));
-  for (int I = 0; I < 4; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I + 1), getDeviceValue(DeviceB5, I));
-
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5, 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5, 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7, 7));
-}
-
-TEST_F(DeviceTest, SyncCopySliceD2D) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB5, I));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA7.asSlice().slice(1), DeviceB5));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7));
-}
-
-TEST_F(DeviceTest, SyncCopyD2DSliceByCount) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().slice(2), 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I + 2));
-
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice(), 7));
-}
-
-TEST_F(DeviceTest, SyncCopyD2DSlice) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2)));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA7, DeviceB5.asSlice()));
-
-  EXPECT_ERROR(Device.synchronousCopyD2D(DeviceA5, DeviceB7.asSlice()));
-}
-
-TEST_F(DeviceTest, SyncCopySliceD2DSliceByCount) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2));
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice(), 7));
-}
-
-TEST_F(DeviceTest, SyncCopySliceD2DSlice) {
-  EXPECT_NO_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice()));
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice()));
-
-  EXPECT_ERROR(
-      Device.synchronousCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice()));
-}
-
-} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
deleted file mode 100644
index 486a3504091..00000000000
--- a/parallel-libs/streamexecutor/unittests/CoreTests/KernelSpecTest.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//===-- KernelSpecTest.cpp - Tests for KernelSpec -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the unit tests for the code in KernelSpec.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/KernelSpec.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace se = ::streamexecutor;
-
-TEST(CUDAPTXInMemorySpec, NoCode) {
-  se::CUDAPTXInMemorySpec Spec("KernelName", {});
-  EXPECT_EQ("KernelName", Spec.getKernelName());
-  EXPECT_EQ(nullptr, Spec.getCode(1, 0));
-}
-
-TEST(CUDAPTXInMemorySpec, SingleComputeCapability) {
-  const char *PTXCodeString = "Dummy PTX code";
-  se::CUDAPTXInMemorySpec Spec("KernelName", {{{1, 0}, PTXCodeString}});
-  EXPECT_EQ("KernelName", Spec.getKernelName());
-  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
-  EXPECT_EQ(PTXCodeString, Spec.getCode(1, 0));
-  EXPECT_EQ(PTXCodeString, Spec.getCode(2, 0));
-}
-
-TEST(CUDAPTXInMemorySpec, TwoComputeCapabilities) {
-  const char *PTXCodeString10 = "Dummy PTX code 10";
-  const char *PTXCodeString30 = "Dummy PTX code 30";
-  se::CUDAPTXInMemorySpec Spec(
-      "KernelName", {{{1, 0}, PTXCodeString10}, {{3, 0}, PTXCodeString30}});
-  EXPECT_EQ("KernelName", Spec.getKernelName());
-  EXPECT_EQ(nullptr, Spec.getCode(0, 5));
-  EXPECT_EQ(PTXCodeString10, Spec.getCode(1, 0));
-  EXPECT_EQ(PTXCodeString30, Spec.getCode(3, 0));
-  EXPECT_EQ(PTXCodeString10, Spec.getCode(2, 0));
-}
-
-TEST(CUDAFatbinInMemorySpec, BasicUsage) {
-  const char *FatbinBytes = "Dummy fatbin bytes";
-  se::CUDAFatbinInMemorySpec Spec("KernelName", FatbinBytes);
-  EXPECT_EQ("KernelName", Spec.getKernelName());
-  EXPECT_EQ(FatbinBytes, Spec.getBytes());
-}
-
-TEST(OpenCLTextInMemorySpec, BasicUsage) {
-  const char *OpenCLText = "Dummy OpenCL text";
-  se::OpenCLTextInMemorySpec Spec("KernelName", OpenCLText);
-  EXPECT_EQ("KernelName", Spec.getKernelName());
-  EXPECT_EQ(OpenCLText, Spec.getText());
-}
-
-TEST(MultiKernelLoaderSpec, NoCode) {
-  se::MultiKernelLoaderSpec MultiSpec;
-  EXPECT_FALSE(MultiSpec.hasCUDAPTXInMemory());
-  EXPECT_FALSE(MultiSpec.hasCUDAFatbinInMemory());
-  EXPECT_FALSE(MultiSpec.hasOpenCLTextInMemory());
-
-  EXPECT_DEBUG_DEATH(MultiSpec.getCUDAPTXInMemory(),
-                     "getting spec that is not present");
-  EXPECT_DEBUG_DEATH(MultiSpec.getCUDAFatbinInMemory(),
-                     "getting spec that is not present");
-  EXPECT_DEBUG_DEATH(MultiSpec.getOpenCLTextInMemory(),
-                     "getting spec that is not present");
-}
-
-TEST(MultiKernelLoaderSpec, Registration) {
-  se::MultiKernelLoaderSpec MultiSpec;
-  const char *KernelName = "KernelName";
-  const char *PTXCodeString = "Dummy PTX code";
-  const char *FatbinBytes = "Dummy fatbin bytes";
-  const char *OpenCLText = "Dummy OpenCL text";
-
-  MultiSpec.addCUDAPTXInMemory(KernelName, {{{1, 0}, PTXCodeString}})
-      .addCUDAFatbinInMemory(KernelName, FatbinBytes)
-      .addOpenCLTextInMemory(KernelName, OpenCLText);
-
-  EXPECT_TRUE(MultiSpec.hasCUDAPTXInMemory());
-  EXPECT_TRUE(MultiSpec.hasCUDAFatbinInMemory());
-  EXPECT_TRUE(MultiSpec.hasOpenCLTextInMemory());
-
-  EXPECT_EQ(KernelName, MultiSpec.getCUDAPTXInMemory().getKernelName());
-  EXPECT_EQ(nullptr, MultiSpec.getCUDAPTXInMemory().getCode(0, 5));
-  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(1, 0));
-  EXPECT_EQ(PTXCodeString, MultiSpec.getCUDAPTXInMemory().getCode(2, 0));
-
-  EXPECT_EQ(KernelName, MultiSpec.getCUDAFatbinInMemory().getKernelName());
-  EXPECT_EQ(FatbinBytes, MultiSpec.getCUDAFatbinInMemory().getBytes());
-
-  EXPECT_EQ(KernelName, MultiSpec.getOpenCLTextInMemory().getKernelName());
-  EXPECT_EQ(OpenCLText, MultiSpec.getOpenCLTextInMemory().getText());
-}
-
-TEST(MultiKernelLoaderSpec, RegisterTwice) {
-  se::MultiKernelLoaderSpec MultiSpec;
-  const char *KernelName = "KernelName";
-  const char *FatbinBytes = "Dummy fatbin bytes";
-
-  MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes);
-
-  EXPECT_DEBUG_DEATH(MultiSpec.addCUDAFatbinInMemory(KernelName, FatbinBytes),
-                     "illegal loader spec overwrite");
-}
-
-TEST(MultiKernelLoaderSpec, ConflictingKernelNames) {
-  se::MultiKernelLoaderSpec MultiSpec;
-  const char *KernelNameA = "KernelName";
-  std::string KernelNameB = KernelNameA;
-  const char *PTXCodeString = "Dummy PTX code";
-  const char *FatbinBytes = "Dummy fatbin bytes";
-
-  // Check that names don't conflict if they are equivalent strings in different
-  // locations.
-  MultiSpec.addCUDAPTXInMemory(KernelNameA, {{{1, 0}, PTXCodeString}})
-      .addCUDAFatbinInMemory(KernelNameB, FatbinBytes);
-
-  const char *OtherKernelName = "OtherKernelName";
-  const char *OpenCLText = "Dummy OpenCL text";
-  EXPECT_DEBUG_DEATH(
-      MultiSpec.addOpenCLTextInMemory(OtherKernelName, OpenCLText),
-      "different kernel names in one MultiKernelLoaderSpec");
-}
-
-} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp
deleted file mode 100644
index 860f21c323a..00000000000
--- a/parallel-libs/streamexecutor/unittests/CoreTests/PackedKernelArgumentArrayTest.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//===-- PackedKernelArgumentArrayTest.cpp - tests for kernel arg packing --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Unit tests for kernel argument packing.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/Device.h"
-#include "streamexecutor/DeviceMemory.h"
-#include "streamexecutor/PackedKernelArgumentArray.h"
-#include "streamexecutor/PlatformDevice.h"
-#include "streamexecutor/platforms/host/HostPlatformDevice.h"
-
-#include "llvm/ADT/Twine.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace se = ::streamexecutor;
-
-using Type = se::KernelArgumentType;
-
-// Test fixture class for testing argument packing.
-//
-// Basically defines a bunch of types to be packed so they don't have to be
-// defined separately in each test.
-class DeviceMemoryPackingTest : public ::testing::Test {
-public:
-  DeviceMemoryPackingTest()
-      : Device(&PDevice), Value(42), Handle(&Value), ByteCount(15),
-        ElementCount(5),
-        TypedGlobal(getOrDie(Device.allocateDeviceMemory<int>(ElementCount))),
-        TypedShared(
-            se::SharedDeviceMemory<int>::makeFromElementCount(ElementCount)) {}
-
-  se::host::HostPlatformDevice PDevice;
-  se::Device Device;
-  int Value;
-  void *Handle;
-  size_t ByteCount;
-  size_t ElementCount;
-  se::GlobalDeviceMemory<int> TypedGlobal;
-  se::SharedDeviceMemory<int> TypedShared;
-};
-
-// Utility method to check the expected address, size, and type for a packed
-// argument at the given index of a PackedKernelArgumentArray.
-template <typename... ParameterTs>
-static void
-ExpectEqual(const void *ExpectedAddress, size_t ExpectedSize, Type ExpectedType,
-            const se::PackedKernelArgumentArray<ParameterTs...> &Observed,
-            size_t Index) {
-  SCOPED_TRACE(("Index = " + llvm::Twine(Index)).str());
-  EXPECT_EQ(ExpectedAddress, Observed.getAddress(Index));
-  EXPECT_EQ(ExpectedAddress, Observed.getAddresses()[Index]);
-  EXPECT_EQ(ExpectedSize, Observed.getSize(Index));
-  EXPECT_EQ(ExpectedSize, Observed.getSizes()[Index]);
-  EXPECT_EQ(ExpectedType, Observed.getType(Index));
-  EXPECT_EQ(ExpectedType, Observed.getTypes()[Index]);
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleValue) {
-  auto Array = se::make_kernel_argument_pack(Value);
-  ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(0u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleTypedGlobal) {
-  auto Array = se::make_kernel_argument_pack(TypedGlobal);
-  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
-              Type::GLOBAL_DEVICE_MEMORY, Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(0u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleTypedGlobalPointer) {
-  auto Array = se::make_kernel_argument_pack(&TypedGlobal);
-  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
-              Type::GLOBAL_DEVICE_MEMORY, Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(0u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleConstTypedGlobalPointer) {
-  const se::GlobalDeviceMemory<int> *ArgumentPointer = &TypedGlobal;
-  auto Array = se::make_kernel_argument_pack(ArgumentPointer);
-  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
-              Type::GLOBAL_DEVICE_MEMORY, Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(0u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleTypedShared) {
-  auto Array = se::make_kernel_argument_pack(TypedShared);
-  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
-              Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(1u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleTypedSharedPointer) {
-  auto Array = se::make_kernel_argument_pack(&TypedShared);
-  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
-              Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(1u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, SingleConstTypedSharedPointer) {
-  const se::SharedDeviceMemory<int> *ArgumentPointer = &TypedShared;
-  auto Array = se::make_kernel_argument_pack(ArgumentPointer);
-  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
-              Array, 0);
-  EXPECT_EQ(1u, Array.getArgumentCount());
-  EXPECT_EQ(1u, Array.getSharedCount());
-}
-
-TEST_F(DeviceMemoryPackingTest, PackSeveralArguments) {
-  const se::GlobalDeviceMemory<int> *TypedGlobalPointer = &TypedGlobal;
-  const se::SharedDeviceMemory<int> *TypedSharedPointer = &TypedShared;
-  auto Array = se::make_kernel_argument_pack(Value, TypedGlobal, &TypedGlobal,
-                                             TypedGlobalPointer, TypedShared,
-                                             &TypedShared, TypedSharedPointer);
-  ExpectEqual(&Value, sizeof(Value), Type::VALUE, Array, 0);
-  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
-              Type::GLOBAL_DEVICE_MEMORY, Array, 1);
-  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
-              Type::GLOBAL_DEVICE_MEMORY, Array, 2);
-  ExpectEqual(TypedGlobal.getHandleAddress(), sizeof(void *),
-              Type::GLOBAL_DEVICE_MEMORY, Array, 3);
-  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
-              Array, 4);
-  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
-              Array, 5);
-  ExpectEqual(nullptr, TypedShared.getByteCount(), Type::SHARED_DEVICE_MEMORY,
-              Array, 6);
-  EXPECT_EQ(7u, Array.getArgumentCount());
-  EXPECT_EQ(3u, Array.getSharedCount());
-}
-
-} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp
deleted file mode 100644
index c9bbcb952ba..00000000000
--- a/parallel-libs/streamexecutor/unittests/CoreTests/StreamTest.cpp
+++ /dev/null
@@ -1,290 +0,0 @@
-//===-- StreamTest.cpp - Tests for Stream ---------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the unit tests for Stream code.
-///
-//===----------------------------------------------------------------------===//
-
-#include <cstring>
-
-#include "streamexecutor/Device.h"
-#include "streamexecutor/Kernel.h"
-#include "streamexecutor/KernelSpec.h"
-#include "streamexecutor/PlatformDevice.h"
-#include "streamexecutor/Stream.h"
-#include "streamexecutor/platforms/host/HostPlatformDevice.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace se = ::streamexecutor;
-
-const auto &getDeviceValue = se::host::HostPlatformDevice::getDeviceValue<int>;
-
-/// Test fixture to hold objects used by tests.
-class StreamTest : public ::testing::Test {
-public:
-  StreamTest()
-      : DummyPlatformStream(1), Device(&PDevice),
-        Stream(&PDevice, &DummyPlatformStream), HostA5{0, 1, 2, 3, 4},
-        HostB5{5, 6, 7, 8, 9}, HostA7{10, 11, 12, 13, 14, 15, 16},
-        HostB7{17, 18, 19, 20, 21, 22, 23}, Host5{24, 25, 26, 27, 28},
-        Host7{29, 30, 31, 32, 33, 34, 35},
-        RegisteredHost5(getOrDie(
-            Device.registerHostMemory(llvm::MutableArrayRef<int>(Host5)))),
-        RegisteredHost7(getOrDie(
-            Device.registerHostMemory(llvm::MutableArrayRef<int>(Host7)))),
-        DeviceA5(getOrDie(Device.allocateDeviceMemory<int>(5))),
-        DeviceB5(getOrDie(Device.allocateDeviceMemory<int>(5))),
-        DeviceA7(getOrDie(Device.allocateDeviceMemory<int>(7))),
-        DeviceB7(getOrDie(Device.allocateDeviceMemory<int>(7))) {
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostA5, DeviceA5));
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostB5, DeviceB5));
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostA7, DeviceA7));
-    se::dieIfError(Device.synchronousCopyH2D<int>(HostB7, DeviceB7));
-  }
-
-protected:
-  int DummyPlatformStream; // Mimicking a platform where the platform stream
-                           // handle is just a stream number.
-  se::host::HostPlatformDevice PDevice;
-  se::Device Device;
-  se::Stream Stream;
-
-  // Device memory is matched by host arrays.
-  int HostA5[5];
-  int HostB5[5];
-  int HostA7[7];
-  int HostB7[7];
-
-  // Host memory to be used as actual host memory.
-  int Host5[5];
-  int Host7[7];
-
-  se::RegisteredHostMemory<int> RegisteredHost5;
-  se::RegisteredHostMemory<int> RegisteredHost7;
-
-  // Device memory.
-  se::GlobalDeviceMemory<int> DeviceA5;
-  se::GlobalDeviceMemory<int> DeviceB5;
-  se::GlobalDeviceMemory<int> DeviceA7;
-  se::GlobalDeviceMemory<int> DeviceB7;
-};
-
-// D2H tests
-
-TEST_F(StreamTest, CopyD2HToRegisteredRefByCount) {
-  Stream.thenCopyD2H(DeviceA5, RegisteredHost5, 5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  Stream.thenCopyD2H(DeviceB5, RegisteredHost5, 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(HostB5[I], Host5[I]);
-
-  Stream.thenCopyD2H(DeviceA7, RegisteredHost5, 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyD2HToRegistered) {
-  Stream.thenCopyD2H(DeviceA5, RegisteredHost5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  Stream.thenCopyD2H(DeviceA5, RegisteredHost7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyD2HSliceToRegiseredSliceByCount) {
-  Stream.thenCopyD2H(DeviceA5.asSlice().slice(1),
-                     RegisteredHost5.asSlice().slice(1, 4), 4);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 1; I < 5; ++I)
-    EXPECT_EQ(HostA5[I], Host5[I]);
-
-  Stream.thenCopyD2H(DeviceB5.asSlice().drop_back(1), RegisteredHost5, 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(HostB5[I], Host5[I]);
-
-  Stream.thenCopyD2H(DeviceA5.asSlice(), RegisteredHost7, 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyD2HSliceToRegistered) {
-  Stream.thenCopyD2H(DeviceA7.asSlice().slice(1, 5), RegisteredHost5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(HostA7[I + 1], Host5[I]);
-
-  Stream.thenCopyD2H(DeviceA5.asSlice(), RegisteredHost7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-// H2D tests
-
-TEST_F(StreamTest, CopyH2DFromRegisterdByCount) {
-  Stream.thenCopyH2D(RegisteredHost5, DeviceA5, 5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  Stream.thenCopyH2D(RegisteredHost5, DeviceB5, 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
-
-  Stream.thenCopyH2D(RegisteredHost7, DeviceA5, 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyH2DFromRegistered) {
-  Stream.thenCopyH2D(RegisteredHost5, DeviceA5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  Stream.thenCopyH2D(RegisteredHost7, DeviceA5);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyH2DFromRegisteredSliceToSlice) {
-  Stream.thenCopyH2D(RegisteredHost5.asSlice().slice(1, 4),
-                     DeviceA5.asSlice().slice(1), 4);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 1; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  Stream.thenCopyH2D(RegisteredHost5, DeviceB5.asSlice().drop_back(1), 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceB5, I), Host5[I]);
-
-  Stream.thenCopyH2D(RegisteredHost5, DeviceA5.asSlice(), 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyH2DRegisteredToSlice) {
-  Stream.thenCopyH2D(RegisteredHost5, DeviceA5.asSlice());
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), Host5[I]);
-
-  Stream.thenCopyH2D(RegisteredHost7, DeviceA5.asSlice());
-  EXPECT_FALSE(Stream.isOK());
-}
-
-// D2D tests
-
-TEST_F(StreamTest, CopyD2DByCount) {
-  Stream.thenCopyD2D(DeviceA5, DeviceB5, 5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  Stream.thenCopyD2D(DeviceA7, DeviceB7, 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  Stream.thenCopyD2D(DeviceA7, DeviceB5, 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyD2D) {
-  Stream.thenCopyD2D(DeviceA5, DeviceB5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  Stream.thenCopyD2D(DeviceA7, DeviceB5);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopySliceD2DByCount) {
-  Stream.thenCopyD2D(DeviceA5.asSlice().slice(1), DeviceB5, 4);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 4; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I + 1), getDeviceValue(DeviceB5, I));
-
-  Stream.thenCopyD2D(DeviceA7.asSlice().drop_back(1), DeviceB7, 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5, 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopySliceD2D) {
-  Stream.thenCopyD2D(DeviceA7.asSlice().drop_back(2), DeviceB5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB5, I));
-
-  Stream.thenCopyD2D(DeviceA5.asSlice().drop_back(1), DeviceB7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyD2DSliceByCount) {
-  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice().slice(2), 5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I + 2));
-
-  Stream.thenCopyD2D(DeviceA7, DeviceB7.asSlice().drop_back(3), 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice(), 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopyD2DSlice) {
-  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice().drop_back(2));
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB7, I));
-
-  Stream.thenCopyD2D(DeviceA5, DeviceB7.asSlice());
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopySliceD2DSliceByCount) {
-  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice(), 5);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  Stream.thenCopyD2D(DeviceA7.asSlice(), DeviceB7.asSlice(), 2);
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 2; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA7, I), getDeviceValue(DeviceB7, I));
-
-  Stream.thenCopyD2D(DeviceA7.asSlice(), DeviceB5.asSlice(), 7);
-  EXPECT_FALSE(Stream.isOK());
-}
-
-TEST_F(StreamTest, CopySliceD2DSlice) {
-  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB5.asSlice());
-  EXPECT_TRUE(Stream.isOK());
-  for (int I = 0; I < 5; ++I)
-    EXPECT_EQ(getDeviceValue(DeviceA5, I), getDeviceValue(DeviceB5, I));
-
-  Stream.thenCopyD2D(DeviceA5.asSlice(), DeviceB7.asSlice());
-  EXPECT_FALSE(Stream.isOK());
-}
-
-} // namespace
diff --git a/parallel-libs/streamexecutor/unittests/CoreTests/cuda/CUDATest.cpp b/parallel-libs/streamexecutor/unittests/CoreTests/cuda/CUDATest.cpp
deleted file mode 100644
index caf14368082..00000000000
--- a/parallel-libs/streamexecutor/unittests/CoreTests/cuda/CUDATest.cpp
+++ /dev/null
@@ -1,215 +0,0 @@
-//===-- CUDATest.cpp - Tests for CUDA platform ----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the unit tests for CUDA platform code.
-///
-//===----------------------------------------------------------------------===//
-
-#include "streamexecutor/StreamExecutor.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-
-namespace compilergen {
-using SaxpyKernel =
-    streamexecutor::Kernel<float, streamexecutor::GlobalDeviceMemory<float>,
-                           streamexecutor::GlobalDeviceMemory<float>>;
-
-const char *SaxpyPTX = R"(
-  .version 4.3
-  .target sm_20
-  .address_size 64
-
-  .visible .entry saxpy(.param .f32 A, .param .u64 X, .param .u64 Y) {
-    .reg .f32 %AValue;
-    .reg .f32 %XValue;
-    .reg .f32 %YValue;
-    .reg .f32 %Result;
-
-    .reg .b64 %XBaseAddrGeneric;
-    .reg .b64 %YBaseAddrGeneric;
-    .reg .b64 %XBaseAddrGlobal;
-    .reg .b64 %YBaseAddrGlobal;
-    .reg .b64 %XAddr;
-    .reg .b64 %YAddr;
-    .reg .b64 %ThreadByteOffset;
-
-    .reg .b32 %TID;
-
-    ld.param.f32 %AValue, [A];
-    ld.param.u64 %XBaseAddrGeneric, [X];
-    ld.param.u64 %YBaseAddrGeneric, [Y];
-    cvta.to.global.u64 %XBaseAddrGlobal, %XBaseAddrGeneric;
-    cvta.to.global.u64 %YBaseAddrGlobal, %YBaseAddrGeneric;
-    mov.u32 %TID, %tid.x;
-    mul.wide.u32 %ThreadByteOffset, %TID, 4;
-    add.s64 %XAddr, %ThreadByteOffset, %XBaseAddrGlobal;
-    add.s64 %YAddr, %ThreadByteOffset, %YBaseAddrGlobal;
-    ld.global.f32 %XValue, [%XAddr];
-    ld.global.f32 %YValue, [%YAddr];
-    fma.rn.f32 %Result, %AValue, %XValue, %YValue;
-    st.global.f32 [%XAddr], %Result;
-    ret;
-  }
-)";
-
-static streamexecutor::MultiKernelLoaderSpec SaxpyLoaderSpec = []() {
-  streamexecutor::MultiKernelLoaderSpec Spec;
-  Spec.addCUDAPTXInMemory("saxpy", {{{2, 0}, SaxpyPTX}});
-  return Spec;
-}();
-
-using SwapPairsKernel =
-    streamexecutor::Kernel<streamexecutor::SharedDeviceMemory<int>,
-                           streamexecutor::GlobalDeviceMemory<int>, int>;
-
-const char *SwapPairsPTX = R"(
-  .version 4.3
-  .target sm_20
-  .address_size 64
-
-  .extern .shared .align 4 .b8 SwapSpace[];
-
-  .visible .entry SwapPairs(.param .u64 InOut, .param .u32 InOutSize) {
-    .reg .b64 %InOutGeneric;
-    .reg .b32 %InOutSizeValue;
-
-    .reg .b32 %LocalIndex;
-    .reg .b32 %PartnerIndex;
-    .reg .b32 %ThreadsPerBlock;
-    .reg .b32 %BlockIndex;
-    .reg .b32 %GlobalIndex;
-
-    .reg .b32 %GlobalIndexBound;
-    .reg .pred %GlobalIndexTooHigh;
-
-    .reg .b64 %InOutGlobal;
-    .reg .b64 %GlobalByteOffset;
-    .reg .b64 %GlobalAddress;
-
-    .reg .b32 %InitialValue;
-    .reg .b32 %SwappedValue;
-
-    .reg .b64 %SharedBaseAddr;
-    .reg .b64 %LocalWriteByteOffset;
-    .reg .b64 %LocalReadByteOffset;
-    .reg .b64 %SharedWriteAddr;
-    .reg .b64 %SharedReadAddr;
-
-    ld.param.u64 %InOutGeneric, [InOut];
-    ld.param.u32 %InOutSizeValue, [InOutSize];
-    mov.u32 %LocalIndex, %tid.x;
-    mov.u32 %ThreadsPerBlock, %ntid.x;
-    mov.u32 %BlockIndex, %ctaid.x;
-    mad.lo.s32 %GlobalIndex, %ThreadsPerBlock, %BlockIndex, %LocalIndex;
-    and.b32 %GlobalIndexBound, %InOutSizeValue, -2;
-    setp.ge.s32 %GlobalIndexTooHigh, %GlobalIndex, %GlobalIndexBound;
-    @%GlobalIndexTooHigh bra END;
-
-    cvta.to.global.u64 %InOutGlobal, %InOutGeneric;
-    mul.wide.s32 %GlobalByteOffset, %GlobalIndex, 4;
-    add.s64 %GlobalAddress, %InOutGlobal, %GlobalByteOffset;
-    ld.global.u32 %InitialValue, [%GlobalAddress];
-    mul.wide.s32 %LocalWriteByteOffset, %LocalIndex, 4;
-    mov.u64 %SharedBaseAddr, SwapSpace;
-    add.s64 %SharedWriteAddr, %SharedBaseAddr, %LocalWriteByteOffset;
-    st.shared.u32 [%SharedWriteAddr], %InitialValue;
-    bar.sync 0;
-    xor.b32 %PartnerIndex, %LocalIndex, 1;
-    mul.wide.s32 %LocalReadByteOffset, %PartnerIndex, 4;
-    add.s64 %SharedReadAddr, %SharedBaseAddr, %LocalReadByteOffset;
-    ld.shared.u32 %SwappedValue, [%SharedReadAddr];
-    st.global.u32 [%GlobalAddress], %SwappedValue;
-
-  END:
-    ret;
-  }
-)";
-
-static streamexecutor::MultiKernelLoaderSpec SwapPairsLoaderSpec = []() {
-  streamexecutor::MultiKernelLoaderSpec Spec;
-  Spec.addCUDAPTXInMemory("SwapPairs", {{{2, 0}, SwapPairsPTX}});
-  return Spec;
-}();
-} // namespace compilergen
-
-namespace se = ::streamexecutor;
-namespace cg = ::compilergen;
-
-class CUDATest : public ::testing::Test {
-public:
-  CUDATest()
-      : Platform(getOrDie(se::PlatformManager::getPlatformByName("CUDA"))),
-        Device(getOrDie(Platform->getDevice(0))),
-        Stream(getOrDie(Device.createStream())) {}
-
-  se::Platform *Platform;
-  se::Device Device;
-  se::Stream Stream;
-};
-
-TEST_F(CUDATest, Saxpy) {
-  float A = 42.0f;
-  std::vector<float> HostX = {0, 1, 2, 3};
-  std::vector<float> HostY = {4, 5, 6, 7};
-  size_t ArraySize = HostX.size();
-
-  cg::SaxpyKernel Kernel =
-      getOrDie(Device.createKernel<cg::SaxpyKernel>(cg::SaxpyLoaderSpec));
-
-  se::RegisteredHostMemory<float> RegisteredX =
-      getOrDie(Device.registerHostMemory<float>(HostX));
-  se::RegisteredHostMemory<float> RegisteredY =
-      getOrDie(Device.registerHostMemory<float>(HostY));
-
-  se::GlobalDeviceMemory<float> X =
-      getOrDie(Device.allocateDeviceMemory<float>(ArraySize));
-  se::GlobalDeviceMemory<float> Y =
-      getOrDie(Device.allocateDeviceMemory<float>(ArraySize));
-
-  Stream.thenCopyH2D(RegisteredX, X)
-      .thenCopyH2D(RegisteredY, Y)
-      .thenLaunch(ArraySize, 1, Kernel, A, X, Y)
-      .thenCopyD2H(X, RegisteredX);
-  se::dieIfError(Stream.blockHostUntilDone());
-
-  std::vector<float> ExpectedX = {4, 47, 90, 133};
-  EXPECT_EQ(ExpectedX, HostX);
-}
-
-TEST_F(CUDATest, DynamicSharedMemory) {
-  std::vector<int> HostPairs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
-  std::vector<int> HostResult(HostPairs.size(), 0);
-  int ArraySize = HostPairs.size();
-
-  cg::SwapPairsKernel Kernel = getOrDie(
-      Device.createKernel<cg::SwapPairsKernel>(cg::SwapPairsLoaderSpec));
-
-  se::RegisteredHostMemory<int> RegisteredPairs =
-      getOrDie(Device.registerHostMemory<int>(HostPairs));
-  se::RegisteredHostMemory<int> RegisteredResult =
-      getOrDie(Device.registerHostMemory<int>(HostResult));
-
-  se::GlobalDeviceMemory<int> Pairs =
-      getOrDie(Device.allocateDeviceMemory<int>(ArraySize));
-  auto SharedMemory =
-      se::SharedDeviceMemory<int>::makeFromElementCount(ArraySize);
-
-  Stream.thenCopyH2D(RegisteredPairs, Pairs)
-      .thenLaunch(ArraySize, 1, Kernel, SharedMemory, Pairs, ArraySize)
-      .thenCopyD2H(Pairs, RegisteredResult);
-  se::dieIfError(Stream.blockHostUntilDone());
-
-  std::vector<int> ExpectedPairs = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10};
-  EXPECT_EQ(ExpectedPairs, HostResult);
-}
-
-} // namespace
author	Jason Henline <jhen@google.com>	2016-10-25 20:38:08 +0000
committer	Jason Henline <jhen@google.com>	2016-10-25 20:38:08 +0000
commit	b3f709e10f37225ae65c1d48c4623f6abc2cac1e (patch)
tree	935df072bbc2ee7873c138dd75f04a32d96c2249 /parallel-libs
parent	209a77d8d9247b7612025a15f0b4c18bc49e66eb (diff)
download	bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.tar.gz bcm5719-llvm-b3f709e10f37225ae65c1d48c4623f6abc2cac1e.zip