diff options
Diffstat (limited to 'clang/test')
-rw-r--r-- | clang/test/CodeGenCUDA/Inputs/cuda.h | 13 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/device-stub.cu | 65 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/kernel-args-alignment.cu | 16 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/kernel-call.cu | 17 | ||||
-rw-r--r-- | clang/test/Driver/cuda-simple.cu | 6 | ||||
-rw-r--r-- | clang/test/SemaCUDA/Inputs/cuda.h | 14 | ||||
-rw-r--r-- | clang/test/SemaCUDA/config-type.cu | 8 |
7 files changed, 106 insertions, 33 deletions
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h index 3adbdc5b6d1..0fd175765a2 100644 --- a/clang/test/CodeGenCUDA/Inputs/cuda.h +++ b/clang/test/CodeGenCUDA/Inputs/cuda.h @@ -15,13 +15,20 @@ struct dim3 { }; typedef struct cudaStream *cudaStream_t; - +typedef enum cudaError {} cudaError_t; #ifdef __HIP__ int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, cudaStream_t stream = 0); #else -int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, - cudaStream_t stream = 0); +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); #endif extern "C" __device__ int printf(const char*, ...); diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu index ea45c391d20..30f88389424 100644 --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -1,14 +1,36 @@ // RUN: echo "GPU binary would be here" > %t // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ -// RUN: -fcuda-include-gpubinary %t -o - \ -// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,CUDA,CUDANORDC +// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s \ +// RUN: --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-OLD // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ -// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \ -// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS +// RUN: -target-sdk-version=8.0 -fcuda-include-gpubinary %t \ +// RUN: -o - -DNOGLOBALS \ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s \ +// RUN: -check-prefixes=NOGLOBALS,CUDANOGLOBALS // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ -// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - \ -// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \ +// RUN: -target-sdk-version=8.0 -fgpu-rdc -fcuda-include-gpubinary %t \ +// RUN: -o - \ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s \ +// RUN: --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA-OLD +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -target-sdk-version=8.0 -o - \ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck %s -allow-deprecated-dag-overlap \ +// RUN: --check-prefixes=ALL,NORDC,CUDA,CUDANORDC,CUDA-NEW +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -target-sdk-version=9.2 -fcuda-include-gpubinary %t -o - -DNOGLOBALS \ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s \ +// RUN: --check-prefixes=NOGLOBALS,CUDANOGLOBALS +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck %s -allow-deprecated-dag-overlap \ +// RUN: --check-prefixes=ALL,RDC,CUDA,CUDARDC,CUDA_NEW +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -target-sdk-version=9.2 -o - \ // RUN: | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ @@ -103,15 +125,34 @@ void use_pointers() { // by a call to cudaLaunch. // ALL: define{{.*}}kernelfunc -// ALL: call{{.*}}[[PREFIX]]SetupArgument -// ALL: call{{.*}}[[PREFIX]]SetupArgument -// ALL: call{{.*}}[[PREFIX]]SetupArgument -// ALL: call{{.*}}[[PREFIX]]Launch + +// New launch sequence stores arguments into local buffer and passes array of +// pointers to them directly to cudaLaunchKernel +// CUDA-NEW: alloca +// CUDA-NEW: store +// CUDA-NEW: store +// CUDA-NEW: store +// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration +// CUDA-NEW: call{{.*}}cudaLaunchKernel + +// Legacy style launch sequence sets up arguments by passing them to +// [cuda|hip]SetupArgument. +// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument +// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument +// CUDA-OLD: call{{.*}}[[PREFIX]]SetupArgument +// CUDA-OLD: call{{.*}}[[PREFIX]]Launch + +// HIP: call{{.*}}[[PREFIX]]SetupArgument +// HIP: call{{.*}}[[PREFIX]]SetupArgument +// HIP: call{{.*}}[[PREFIX]]SetupArgument +// HIP: call{{.*}}[[PREFIX]]Launch __global__ void kernelfunc(int i, int j, int k) {} // Test that we've built correct kernel launch sequence. // ALL: define{{.*}}hostfunc -// ALL: call{{.*}}[[PREFIX]]ConfigureCall +// CUDA-OLD: call{{.*}}[[PREFIX]]ConfigureCall +// CUDA-NEW: call{{.*}}__cudaPushCallConfiguration +// HIP: call{{.*}}[[PREFIX]]ConfigureCall // ALL: call{{.*}}kernelfunc void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } #endif diff --git a/clang/test/CodeGenCUDA/kernel-args-alignment.cu b/clang/test/CodeGenCUDA/kernel-args-alignment.cu index 4bd5eb1bb1f..653f3eb23d2 100644 --- a/clang/test/CodeGenCUDA/kernel-args-alignment.cu +++ b/clang/test/CodeGenCUDA/kernel-args-alignment.cu @@ -1,8 +1,12 @@ -// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix HOST -check-prefix CHECK %s +// New CUDA kernel launch sequence does not require explicit specification of +// size/offset for each argument, so only the old way is tested. +// +// RUN: %clang_cc1 --std=c++11 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -target-sdk-version=8.0 -o - %s \ +// RUN: | FileCheck -check-prefixes=HOST-OLD,CHECK %s // RUN: %clang_cc1 --std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda \ -// RUN: -emit-llvm -o - %s | FileCheck -check-prefix DEVICE -check-prefix CHECK %s +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=DEVICE,CHECK %s #include "Inputs/cuda.h" @@ -27,9 +31,9 @@ static_assert(alignof(S) == 8, "Unexpected alignment."); // 1. offset 0, width 1 // 2. offset 8 (because alignof(S) == 8), width 16 // 3. offset 24, width 8 -// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 1, i64 0) -// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 16, i64 8) -// HOST: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24) +// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 1, i64 0) +// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 16, i64 8) +// HOST-OLD: call i32 @cudaSetupArgument({{[^,]*}}, i64 8, i64 24) // DEVICE-LABEL: @_Z6kernelc1SPi // DEVICE-SAME: i8{{[^,]*}}, %struct.S* byval align 8{{[^,]*}}, i32* diff --git a/clang/test/CodeGenCUDA/kernel-call.cu b/clang/test/CodeGenCUDA/kernel-call.cu index 43d08dfaf89..ed48a6cc813 100644 --- a/clang/test/CodeGenCUDA/kernel-call.cu +++ b/clang/test/CodeGenCUDA/kernel-call.cu @@ -1,5 +1,9 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK -// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECK +// RUN: %clang_cc1 -target-sdk-version=8.0 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK +// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK +// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=HIP,CHECK #include "Inputs/cuda.h" @@ -7,14 +11,17 @@ // CHECK-LABEL: define{{.*}}g1 // HIP: call{{.*}}hipSetupArgument // HIP: call{{.*}}hipLaunchByPtr -// CUDA: call{{.*}}cudaSetupArgument -// CUDA: call{{.*}}cudaLaunch +// CUDA-OLD: call{{.*}}cudaSetupArgument +// CUDA-OLD: call{{.*}}cudaLaunch +// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration +// CUDA-NEW: call{{.*}}cudaLaunchKernel __global__ void g1(int x) {} // CHECK-LABEL: define{{.*}}main int main(void) { // HIP: call{{.*}}hipConfigureCall - // CUDA: call{{.*}}cudaConfigureCall + // CUDA-OLD: call{{.*}}cudaConfigureCall + // CUDA-NEW: call{{.*}}__cudaPushCallConfiguration // CHECK: icmp // CHECK: br // CHECK: call{{.*}}g1 diff --git a/clang/test/Driver/cuda-simple.cu b/clang/test/Driver/cuda-simple.cu index fbc5aa14133..b6840be4e2e 100644 --- a/clang/test/Driver/cuda-simple.cu +++ b/clang/test/Driver/cuda-simple.cu @@ -2,7 +2,7 @@ // http://llvm.org/PR22936 // RUN: %clang -nocudainc -nocudalib -Werror -fsyntax-only -c %s // -// Verify that we pass -x cuda-cpp-output to compiler after +// Verify that we pass -x cuda-cpp-output to compiler after // preprocessing a CUDA file // RUN: %clang -Werror -### -save-temps -c %s 2>&1 | FileCheck %s // CHECK: "-cc1" @@ -14,7 +14,9 @@ // Verify that compiler accepts CUDA syntax with "-x cuda-cpp-output". // RUN: %clang -Werror -fsyntax-only -x cuda-cpp-output -c %s -int cudaConfigureCall(int, int); +extern "C" int cudaConfigureCall(int, int); +extern "C" int __cudaPushCallConfiguration(int, int); + __attribute__((global)) void kernel() {} void func() { diff --git a/clang/test/SemaCUDA/Inputs/cuda.h b/clang/test/SemaCUDA/Inputs/cuda.h index 4544369411f..2600bfa9c47 100644 --- a/clang/test/SemaCUDA/Inputs/cuda.h +++ b/clang/test/SemaCUDA/Inputs/cuda.h @@ -18,9 +18,17 @@ struct dim3 { }; typedef struct cudaStream *cudaStream_t; - -int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, - cudaStream_t stream = 0); +typedef enum cudaError {} cudaError_t; + +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); // Host- and device-side placement new overloads. void *operator new(__SIZE_TYPE__, void *p) { return p; } diff --git a/clang/test/SemaCUDA/config-type.cu b/clang/test/SemaCUDA/config-type.cu index a469d38d3e9..a122c4539a3 100644 --- a/clang/test/SemaCUDA/config-type.cu +++ b/clang/test/SemaCUDA/config-type.cu @@ -1,3 +1,7 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -target-sdk-version=8.0 -fsyntax-only -verify=legacy-launch %s +// RUN: %clang_cc1 -target-sdk-version=9.2 -fsyntax-only -verify=new-launch %s -void cudaConfigureCall(unsigned gridSize, unsigned blockSize); // expected-error {{must have scalar return type}} +// legacy-launch-error@+1 {{must have scalar return type}} +void cudaConfigureCall(unsigned gridSize, unsigned blockSize); +// new-launch-error@+1 {{must have scalar return type}} +void __cudaPushCallConfiguration(unsigned gridSize, unsigned blockSize); |