diff options
Diffstat (limited to 'openmp')
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt | 136 | ||||
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/amdgcn/src/device_environment.h | 27 | ||||
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h | 31 | ||||
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/support.h (renamed from openmp/libomptarget/deviceRTLs/common/support.h) | 0 |
5 files changed, 14 insertions, 183 deletions
diff --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt index 8bbf987aaf2..9723fb8cde3 100644 --- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt @@ -6,9 +6,8 @@ # # ##===----------------------------------------------------------------------===## # -# Build a device RTL for each available machine. +# Build a device RTL for each available machine available. # ##===----------------------------------------------------------------------===## -add_subdirectory(amdgcn) add_subdirectory(nvptx) diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt deleted file mode 100644 index fe117bfbdbc..00000000000 --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ /dev/null @@ -1,136 +0,0 @@ -##===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -##===----------------------------------------------------------------------===## -# -# Build the AMDGCN Device RTL if the ROCM tools are available -# -##===----------------------------------------------------------------------===## - -find_package(LLVM QUIET CONFIG - PATHS - $ENV{AOMP} - $ENV{HOME}/rocm/aomp - /opt/rocm/aomp - /usr/lib/rocm/aomp - ${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR} - ${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR} - ${CMAKE_CXX_COMPILER_DIR} - NO_DEFAULT_PATH) - -if (LLVM_DIR) - libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake") -else() - libomptarget_say("Not building AMDGCN device RTL: AOMP not found") - return() -endif() - -set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX}) - -if (AOMP_INSTALL_PREFIX) - set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin) -else() - set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin) -endif() - -libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}") - -project(omptarget-amdgcn) - -add_custom_target(omptarget-amdgcn ALL) - -#optimization level -set(optimization_level 2) - -# Activate RTL message dumps if requested by the user. -if(LIBOMPTARGET_NVPTX_DEBUG) - set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1) -endif() - -get_filename_component(devicertl_base_directory - ${CMAKE_CURRENT_SOURCE_DIR} - DIRECTORY) - -set(cuda_sources - ${devicertl_base_directory}/common/src/cancel.cu - ${devicertl_base_directory}/common/src/critical.cu) - -set(h_files - ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/device_environment.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h - ${devicertl_base_directory}/common/debug.h - ${devicertl_base_directory}/common/state-queue.h - ${devicertl_base_directory}/common/state-queuei.h - ${devicertl_base_directory}/common/support.h) - -# for both in-tree and out-of-tree build -if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY) - set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR}) -else() - set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}) -endif() - -# create libraries -set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900) -if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) - set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) -endif() - -macro(add_cuda_bc_library) - set(cu_cmd ${AOMP_BINDIR}/clang++ - -std=c++11 - -fcuda-rdc - -fvisibility=default - --cuda-device-only - -Wno-unused-value - -x hip - -O${optimization_level} - --cuda-gpu-arch=${mcpu} - ${CUDA_DEBUG} - -I${CMAKE_CURRENT_SOURCE_DIR}/src - -I${devicertl_base_directory}) - - set(bc1_files) - - foreach(file ${ARGN}) - get_filename_component(fname ${file} NAME_WE) - set(bc1_filename ${fname}.${mcpu}.bc) - - add_custom_command( - OUTPUT ${bc1_filename} - COMMAND ${cu_cmd} ${file} -o ${bc1_filename} - DEPENDS ${file} ${h_files}) - - list(APPEND bc1_files ${bc1_filename}) - endforeach() - - add_custom_command( - OUTPUT linkout.cuda.${mcpu}.bc - COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc - DEPENDS ${bc1_files}) - - list(APPEND bc_files linkout.cuda.${mcpu}.bc) -endmacro() - -set(libname "omptarget-amdgcn") - -foreach(mcpu ${mcpus}) - set(bc_files) - add_cuda_bc_library(${cuda_sources}) - - set(bc_libname lib${libname}-${mcpu}.bc) - add_custom_command( - OUTPUT ${bc_libname} - COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname} - DEPENDS ${bc_files}) - - add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname}) - - install(FILES ${OUTPUTDIR}/${bc_libname} - DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice" - ) -endforeach() diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/device_environment.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/device_environment.h deleted file mode 100644 index 71ab15da931..00000000000 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/device_environment.h +++ /dev/null @@ -1,27 +0,0 @@ -//===---- device_environment.h - OpenMP GPU device environment --- CUDA -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Global device environment -// -//===----------------------------------------------------------------------===// - -#ifndef _OMPTARGET_DEVICE_ENVIRONMENT_H_ -#define _OMPTARGET_DEVICE_ENVIRONMENT_H_ - -#include "target_impl.h" - -struct omptarget_device_environmentTy { - int32_t debug_level; // gets value of envvar LIBOMPTARGET_DEVICE_RTL_DEBUG - // only useful for Debug build of deviceRTLs - int32_t num_devices; // gets number of active offload devices - int32_t device_num; // gets a value 0 to num_devices-1 -}; - -extern DEVICE omptarget_device_environmentTy omptarget_device_environment; - -#endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index c6e082c2b96..b2d06ab4b73 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -72,6 +72,8 @@ EXTERN uint64_t __lanemask_lt(); // thread's lane number in the warp EXTERN uint64_t __lanemask_gt(); +EXTERN void llvm_amdgcn_s_barrier(); + // CU id EXTERN unsigned __smid(); @@ -99,21 +101,25 @@ INLINE uint32_t __kmpc_impl_smid() { return __smid(); } -INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); } +INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); } -INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); } +INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); } INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() { return __ballot64(1); } -EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var, - int32_t SrcLane); +INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var, + int32_t SrcLane) { + return __shfl(Var, SrcLane, WARPSIZE); +} -EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var, - uint32_t Delta, int32_t Width); +INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var, + uint32_t Delta, int32_t Width) { + return __shfl_down(Var, Delta, Width); +} -INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); } +INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); } INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { // we have protected the master warp from releasing from its barrier @@ -122,15 +128,4 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { __builtin_amdgcn_s_barrier(); } -// DEVICE versions of part of libc -extern "C" { -DEVICE __attribute__((noreturn)) void -__assertfail(const char *, const char *, unsigned, const char *, size_t); -INLINE static void __assert_fail(const char *__message, const char *__file, - unsigned int __line, const char *__function) { - __assertfail(__message, __file, __line, __function, sizeof(char)); -} -DEVICE int printf(const char *, ...); -} - #endif diff --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h index 8cffd91c9f3..8cffd91c9f3 100644 --- a/openmp/libomptarget/deviceRTLs/common/support.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.h |

