diff options
author | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-05-18 15:07:56 +0000 |
---|---|---|
committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2018-05-18 15:07:56 +0000 |
commit | 29155b01c13d6ee67af9a8a6fd3b0c3b444ed306 (patch) | |
tree | a8af55686b5866c54471979afe280704f480ee76 /clang/test/CodeGenCUDA | |
parent | 655ef1875b92266c8ad81bf2b28ad92350894ca9 (diff) | |
download | bcm5719-llvm-29155b01c13d6ee67af9a8a6fd3b0c3b444ed306.tar.gz bcm5719-llvm-29155b01c13d6ee67af9a8a6fd3b0c3b444ed306.zip |
[HIP] Support offloading by linker script
To support linking device code in different source files, it is necessary to
embed fat binary at host linking stage.
This patch emits an external symbol for fat binary in host codegen, then
embed the fat binary by lld through a linker script.
Differential Revision: https://reviews.llvm.org/D46472
llvm-svn: 332724
Diffstat (limited to 'clang/test/CodeGenCUDA')
-rw-r--r-- | clang/test/CodeGenCUDA/device-stub.cu | 40 |
1 files changed, 23 insertions, 17 deletions
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index 8339d872ad9..894b7205c30 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -1,13 +1,13 @@ // RUN: echo "GPU binary would be here" > %t // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - \ -// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,CUDA +// RUN: | FileCheck %s --check-prefixes=ALL,NORDC,CUDA,CUDANORDC // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \ -// RUN: | FileCheck %s -check-prefix=NOGLOBALS +// RUN: | FileCheck %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \ -// RUN: | FileCheck %s --check-prefixes=ALL,RDC,CUDA +// RUN: | FileCheck %s --check-prefixes=ALL,RDC,CUDA,CUDARDC // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \ // RUN: | FileCheck %s -check-prefix=NOGPUBIN @@ -16,10 +16,10 @@ // RUN: | FileCheck %s --check-prefixes=ALL,NORDC,HIP // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \ -// RUN: | FileCheck %s -check-prefix=NOGLOBALS +// RUN: | FileCheck %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ // RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \ -// RUN: | FileCheck %s --check-prefixes=ALL,RDC,HIP +// RUN: | FileCheck %s --check-prefixes=ALL,RDC,HIP,HIPRDC // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\ // RUN: | FileCheck %s -check-prefix=NOGPUBIN @@ -64,21 +64,26 @@ void use_pointers() { // * constant unnamed string with the kernel name // ALL: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00" // * constant unnamed string with GPU binary -// ALL: private unnamed_addr constant{{.*GPU binary would be here.*}}\00" -// NORDC-SAME: section ".nv_fatbin", align 8 -// RDC-SAME: section "__nv_relfatbin", align 8 +// HIP: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin" +// CUDA: @[[FATBIN:.*]] = private unnamed_addr constant{{.*GPU binary would be here.*}}\00", +// CUDANORDC-SAME: section ".nv_fatbin", align 8 +// CUDARDC-SAME: section "__nv_relfatbin", align 8 // * constant struct that wraps GPU binary -// CUDA: @__[[PREFIX:cuda]]_fatbin_wrapper = internal constant -// CUDA-SAME: { i32, i32, i8*, i8* } -// HIP: @__[[PREFIX:hip]]_fatbin_wrapper = internal constant -// HIP-SAME: { i32, i32, i8*, i8* } -// ALL-SAME: { i32 1180844977, i32 1, {{.*}}, i8* null } -// ALL-SAME: section ".nvFatBinSegment" +// ALL: @__[[PREFIX:cuda|hip]]_fatbin_wrapper = internal constant +// ALL-SAME: { i32, i32, i8*, i8* } +// CUDA-SAME: { i32 1180844977, i32 1, +// HIP-SAME: { i32 1212764230, i32 1, +// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0), +// HIP-SAME: i8* @[[FATBIN]], +// ALL-SAME: i8* null } +// CUDA-SAME: section ".nvFatBinSegment" +// HIP-SAME: section ".hipFatBinSegment" // * variable to save GPU binary handle after initialization // NORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null // * constant unnamed string with NVModuleID // RDC: [[MODULE_ID_GLOBAL:@.*]] = private unnamed_addr constant -// RDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32 +// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32 +// HIPRDC-SAME: c"[[MODULE_ID:.+]]\00", section "__hip_module_id", align 32 // * Make sure our constructor was added to global ctor list. // ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor // * In separate mode we also register a destructor. @@ -136,9 +141,10 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // There should be no __[[PREFIX]]_register_globals if we have no // device-side globals, but we still need to register GPU binary. // Skip GPU binary string first. -// NOGLOBALS: @0 = private unnamed_addr constant{{.*}} +// CUDANOGLOBALS: @{{.*}} = private unnamed_addr constant{{.*}} +// HIPNOGLOBALS: @{{.*}} = external constant{{.*}} // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals -// NOGLOBALS: define internal void @__[[PREFIX:.*]]_module_ctor +// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor // NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals // NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor |