summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openmp/libomptarget/deviceRTLs/CMakeLists.txt3
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt136
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h31
-rw-r--r--openmp/libomptarget/deviceRTLs/common/debug.h4
-rw-r--r--openmp/libomptarget/deviceRTLs/common/device_environment.h (renamed from openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h)2
-rw-r--r--openmp/libomptarget/deviceRTLs/common/support.h (renamed from openmp/libomptarget/deviceRTLs/nvptx/src/support.h)0
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/support.cu2
9 files changed, 162 insertions, 20 deletions
diff --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt
index 9723fb8cde3..8bbf987aaf2 100644
--- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt
@@ -6,8 +6,9 @@
#
# ##===----------------------------------------------------------------------===##
#
-# Build a device RTL for each available machine available.
+# Build a device RTL for each available machine.
#
##===----------------------------------------------------------------------===##
+add_subdirectory(amdgcn)
add_subdirectory(nvptx)
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
new file mode 100644
index 00000000000..6b82b4eccdc
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -0,0 +1,136 @@
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build the AMDGCN Device RTL if the ROCM tools are available
+#
+##===----------------------------------------------------------------------===##
+
+find_package(LLVM QUIET CONFIG
+ PATHS
+ $ENV{AOMP}
+ $ENV{HOME}/rocm/aomp
+ /opt/rocm/aomp
+ /usr/lib/rocm/aomp
+ ${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR}
+ ${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR}
+ ${CMAKE_CXX_COMPILER_DIR}
+ NO_DEFAULT_PATH)
+
+if (LLVM_DIR)
+ libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake")
+else()
+ libomptarget_say("Not building AMDGCN device RTL: AOMP not found")
+ return()
+endif()
+
+set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX})
+
+if (AOMP_INSTALL_PREFIX)
+ set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin)
+else()
+ set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin)
+endif()
+
+libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}")
+
+project(omptarget-amdgcn)
+
+add_custom_target(omptarget-amdgcn ALL)
+
+#optimization level
+set(optimization_level 2)
+
+# Activate RTL message dumps if requested by the user.
+if(LIBOMPTARGET_NVPTX_DEBUG)
+ set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
+endif()
+
+get_filename_component(devicertl_base_directory
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ DIRECTORY)
+
+set(cuda_sources
+ ${devicertl_base_directory}/common/src/cancel.cu
+ ${devicertl_base_directory}/common/src/critical.cu)
+
+set(h_files
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
+ ${devicertl_base_directory}/common/debug.h
+ ${devicertl_base_directory}/common/device_environment.h
+ ${devicertl_base_directory}/common/state-queue.h
+ ${devicertl_base_directory}/common/state-queuei.h
+ ${devicertl_base_directory}/common/support.h)
+
+# for both in-tree and out-of-tree build
+if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
+ set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR})
+else()
+ set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
+endif()
+
+# create libraries
+set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
+if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
+ set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
+endif()
+
+macro(add_cuda_bc_library)
+ set(cu_cmd ${AOMP_BINDIR}/clang++
+ -std=c++11
+ -fcuda-rdc
+ -fvisibility=default
+ --cuda-device-only
+ -Wno-unused-value
+ -x hip
+ -O${optimization_level}
+ --cuda-gpu-arch=${mcpu}
+ ${CUDA_DEBUG}
+ -I${CMAKE_CURRENT_SOURCE_DIR}/src
+ -I${devicertl_base_directory})
+
+ set(bc1_files)
+
+ foreach(file ${ARGN})
+ get_filename_component(fname ${file} NAME_WE)
+ set(bc1_filename ${fname}.${mcpu}.bc)
+
+ add_custom_command(
+ OUTPUT ${bc1_filename}
+ COMMAND ${cu_cmd} ${file} -o ${bc1_filename}
+ DEPENDS ${file} ${h_files})
+
+ list(APPEND bc1_files ${bc1_filename})
+ endforeach()
+
+ add_custom_command(
+ OUTPUT linkout.cuda.${mcpu}.bc
+ COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc
+ DEPENDS ${bc1_files})
+
+ list(APPEND bc_files linkout.cuda.${mcpu}.bc)
+endmacro()
+
+set(libname "omptarget-amdgcn")
+
+foreach(mcpu ${mcpus})
+ set(bc_files)
+ add_cuda_bc_library(${cuda_sources})
+
+ set(bc_libname lib${libname}-${mcpu}.bc)
+ add_custom_command(
+ OUTPUT ${bc_libname}
+ COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname}
+ DEPENDS ${bc_files})
+
+ add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})
+
+ install(FILES ${OUTPUTDIR}/${bc_libname}
+ DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice"
+ )
+endforeach()
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b2d06ab4b73..c6e082c2b96 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -72,8 +72,6 @@ EXTERN uint64_t __lanemask_lt();
// thread's lane number in the warp
EXTERN uint64_t __lanemask_gt();
-EXTERN void llvm_amdgcn_s_barrier();
-
// CU id
EXTERN unsigned __smid();
@@ -101,25 +99,21 @@ INLINE uint32_t __kmpc_impl_smid() {
return __smid();
}
-INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); }
+INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
-INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); }
+INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
return __ballot64(1);
}
-INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
- int32_t SrcLane) {
- return __shfl(Var, SrcLane, WARPSIZE);
-}
+EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
+ int32_t SrcLane);
-INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
- uint32_t Delta, int32_t Width) {
- return __shfl_down(Var, Delta, Width);
-}
+EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
+ uint32_t Delta, int32_t Width);
-INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); }
+INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
// we have protected the master warp from releasing from its barrier
@@ -128,4 +122,15 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
__builtin_amdgcn_s_barrier();
}
+// DEVICE versions of part of libc
+extern "C" {
+DEVICE __attribute__((noreturn)) void
+__assertfail(const char *, const char *, unsigned, const char *, size_t);
+INLINE static void __assert_fail(const char *__message, const char *__file,
+ unsigned int __line, const char *__function) {
+ __assertfail(__message, __file, __line, __function, sizeof(char));
+}
+DEVICE int printf(const char *, ...);
+}
+
#endif
diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h
index 3388b04616f..8bb4e3a6dd0 100644
--- a/openmp/libomptarget/deviceRTLs/common/debug.h
+++ b/openmp/libomptarget/deviceRTLs/common/debug.h
@@ -28,7 +28,7 @@
#ifndef _OMPTARGET_NVPTX_DEBUG_H_
#define _OMPTARGET_NVPTX_DEBUG_H_
-#include "device_environment.h"
+#include "common/device_environment.h"
////////////////////////////////////////////////////////////////////////////////
// set desired level of debugging
@@ -128,7 +128,7 @@
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
#include <stdio.h>
-#include "support.h"
+#include "common/support.h"
template <typename... Arguments>
NOINLINE static void log(const char *fmt, Arguments... parameters) {
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h b/openmp/libomptarget/deviceRTLs/common/device_environment.h
index b2f65af354a..68a7757d204 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h
+++ b/openmp/libomptarget/deviceRTLs/common/device_environment.h
@@ -19,6 +19,6 @@ struct omptarget_device_environmentTy {
int32_t debug_level;
};
-extern __device__ omptarget_device_environmentTy omptarget_device_environment;
+extern DEVICE omptarget_device_environmentTy omptarget_device_environment;
#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
index 8cffd91c9f3..8cffd91c9f3 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
+++ b/openmp/libomptarget/deviceRTLs/common/support.h
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
index 181bceb3e17..46ed8f4ef34 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "omptarget-nvptx.h"
-#include "device_environment.h"
+#include "common/device_environment.h"
////////////////////////////////////////////////////////////////////////////////
// global device environment
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index a446e005c32..51e88adee6d 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -24,7 +24,7 @@
#include "common/debug.h" // debug
#include "interface.h" // interfaces with omp, compiler, and user
#include "common/state-queue.h"
-#include "support.h"
+#include "common/support.h"
#define OMPTARGET_NVPTX_VERSION 1.1
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
index c8ac493459c..b5efa632b00 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "support.h"
+#include "common/support.h"
#include "common/debug.h"
#include "omptarget-nvptx.h"
OpenPOWER on IntegriCloud