summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGenCUDA
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2018-10-02 17:48:54 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2018-10-02 17:48:54 +0000
commit9767089d003b52ec9d32b93c8533c815c9906902 (patch)
treee9788366c4929488c5cc4c31054fa4ebc749b0f9 /clang/test/CodeGenCUDA
parent2b5259afb3ce81334fab0f4d7a741b85f465caf7 (diff)
downloadbcm5719-llvm-9767089d003b52ec9d32b93c8533c815c9906902.tar.gz
bcm5719-llvm-9767089d003b52ec9d32b93c8533c815c9906902.zip
[HIP] Support early finalization of device code for -fno-gpu-rdc
This patch renames -f{no-}cuda-rdc to -f{no-}gpu-rdc and keeps the original options as aliases. When -fgpu-rdc is off, clang will assume the device code in each translation unit does not call external functions except those in the device library, therefore it is possible to compile the device code in each translation unit to self-contained kernels and embed them in the host object, so that the host object behaves like usual host object which can be linked by lld. The benefits of this feature is: 1. allow users to create static libraries which can be linked by host linker; 2. amortized device code linking time. This patch modifies HIP action builder to insert actions for linking device code and generating HIP fatbin, and pass HIP fatbin to host backend action. It extracts code for constructing command for generating HIP fatbin as a function so that it can be reused by early finalization. It also modifies codegen of HIP host constructor functions to embed the device fatbin when it is available. Differential Revision: https://reviews.llvm.org/D52377 llvm-svn: 343611
Diffstat (limited to 'clang/test/CodeGenCUDA')
-rw-r--r--clang/test/CodeGenCUDA/device-stub.cu20
1 files changed, 11 insertions, 9 deletions
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 06877da8588..7abb7ae3a03 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -6,22 +6,22 @@
// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \
+// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
-// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP
+// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
-// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,NORDC,HIP,HIPNEF
#include "Inputs/cuda.h"
@@ -64,8 +64,9 @@ void use_pointers() {
// * constant unnamed string with the kernel name
// ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00"
// * constant unnamed string with GPU binary
-// HIP: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
+// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
+// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
// CUDANORDC-SAME: section ".nv_fatbin", align 8
// CUDARDC-SAME: section "__nv_relfatbin", align 8
// * constant struct that wraps GPU binary
@@ -74,13 +75,14 @@ void use_pointers() {
// CUDA-SAME: { i32 1180844977, i32 1,
// HIP-SAME: { i32 1212764230, i32 1,
// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
-// HIP-SAME: i8* @[[FATBIN]],
+// HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
+// HIPNEF-SAME: i8* @[[FATBIN]],
// ALL-SAME: i8* null }
// CUDA-SAME: section ".nvFatBinSegment"
// HIP-SAME: section ".hipFatBinSegment"
// * variable to save GPU binary handle after initialization
// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
-// HIP: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
+// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
// * constant unnamed string with NVModuleID
// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
@@ -157,7 +159,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
// device-side globals, but we still need to register GPU binary.
// Skip GPU binary string first.
// CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
-// HIPNOGLOBALS: @{{.*}} = external constant{{.*}}
+// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}}
// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
OpenPOWER on IntegriCloud