[cuda] Driver changes to compile and stitch together host and device-side CUDA code.

NOTE: reverts r242077 to reinstate r242058, r242065, 242067 and includes fix for OS X test failures. - Changed driver pipeline to compile host and device side of CUDA files and incorporate results of device-side compilation into host object file. - Added a test for cuda pipeline creation in clang driver. New clang options: --cuda-host-only - Do host-side compilation only. --cuda-device-only - Do device-side compilation only. --cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side compilation. E.g. sm_35, sm_30. Default is sm_20. May be used more than once in which case one device-compilation will be done per unique specified GPU architecture. Differential Revision: http://reviews.llvm.org/D9509 llvm-svn: 242085
author: Artem Belevich <tra@google.com> 2015-07-13 23:27:56 +0000
committer: Artem Belevich <tra@google.com> 2015-07-13 23:27:56 +0000
commit: 0ff05cd1651474678e8f503cae4956f0c342bf67 (patch)
tree: 98c608f18a4fd854516da066943e603c71280988 /clang/test
parent: 2eacca86ef2c255bae8eff43056dfa6c57ae7092 (diff)
download: bcm5719-llvm-0ff05cd1651474678e8f503cae4956f0c342bf67.tar.gz
bcm5719-llvm-0ff05cd1651474678e8f503cae4956f0c342bf67.zip
3 files changed, 130 insertions, 10 deletions
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
new file mode 100644
index 00000000000..ecb3f228eab
--- /dev/null
+++ b/clang/test/Driver/cuda-options.cu
@@ -0,0 +1,109 @@
+// Tests CUDA compilation pipeline construction in Driver.
+// REQUIRES: clang-driver
+
+// Simple compilation case:
+// RUN: %clang -### -c %s 2>&1 \
+// Compile device-side to PTX assembly and make sure we use it on the host side.
+// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// Then compile host side and incorporate device code.
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
+// Make sure we don't link anything.
+// RUN:   -check-prefix CUDA-NL %s
+
+// Typical compilation + link case:
+// RUN: %clang -### %s 2>&1 \
+// Compile device-side to PTX assembly and make sure we use it on the host side
+// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// Then compile host side and incorporate device code.
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
+// Then link things.
+// RUN:   -check-prefix CUDA-L %s
+
+// Verify that -cuda-no-device disables device-side compilation and linking
+// RUN: %clang -### --cuda-host-only %s 2>&1 \
+// Make sure we didn't run device-side compilation.
+// RUN:   | FileCheck -check-prefix CUDA-ND \
+// Then compile host side and make sure we don't attempt to incorporate GPU code.
+// RUN:    -check-prefix CUDA-H -check-prefix CUDA-H-NI \
+// Make sure we don't link anything.
+// RUN:    -check-prefix CUDA-NL %s
+
+// Verify that -cuda-no-host disables host-side compilation and linking
+// RUN: %clang -### --cuda-device-only %s 2>&1 \
+// Compile device-side to PTX assembly
+// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// Make sure there are no host cmpilation or linking.
+// RUN:   -check-prefix CUDA-NH -check-prefix CUDA-NL %s
+
+// Verify that with -S we compile host and device sides to assembly
+// and incorporate device code on the host side.
+// RUN: %clang -### -S -c %s 2>&1 \
+// Compile device-side to PTX assembly
+// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// Then compile host side and incorporate GPU code.
+// RUN:  -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
+// Make sure we don't link anything.
+// RUN:  -check-prefix CUDA-NL %s
+
+// Verify that --cuda-gpu-arch option passes correct GPU
+// archtecture info to device compilation.
+// RUN: %clang -### --cuda-gpu-arch=sm_35 -c %s 2>&1 \
+// Compile device-side to PTX assembly.
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
+// Then compile host side and incorporate GPU code.
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
+// Make sure we don't link anything.
+// RUN:   -check-prefix CUDA-NL %s
+
+// Verify that there is device-side compilation per --cuda-gpu-arch args
+// and that all results are included on the host side.
+// RUN: %clang -### --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
+// Compile both device-sides to PTX assembly
+// RUN:   | FileCheck \
+// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
+// RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \
+// Then compile host side and incorporate both device-side outputs
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 -check-prefix CUDA-H-I2 \
+// Make sure we don't link anything.
+// RUN:   -check-prefix CUDA-NL %s
+
+// Match device-side compilation
+// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D1-SAME: "-fcuda-is-device"
+// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
+// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
+// CUDA-D1-SAME: "-x" "cuda"
+
+// Match anothe device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D2-SAME: "-fcuda-is-device"
+// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
+// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
+// CUDA-D2-SAME: "-x" "cuda"
+
+// Match no device-side compilation
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
+// CUDA-ND-SAME-NOT: "-fcuda-is-device"
+
+// Match host-side compilation
+// CUDA-H: "-cc1" "-triple"
+// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda"
+// CUDA-H-SAME-NOT: "-fcuda-is-device"
+// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]"
+// CUDA-H-SAME: "-x" "cuda"
+// CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]"
+// CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]"
+
+// Match no GPU code inclusion.
+// CUDA-H-NI-NOT: "-fcuda-include-gpubinary"
+
+// Match no CUDA compilation
+// CUDA-NH-NOT: "-cc1" "-triple"
+// CUDA-NH-SAME-NOT: "-x" "cuda"
+
+// Match linker
+// CUDA-L: "{{.*}}ld{{(.exe)?}}"
+// CUDA-L-SAME: "[[HOSTOBJ]]"
+
+// Match no linker
+// CUDA-NL-NOT: "{{.*}}ld{{(.exe)?}}"
diff --git a/clang/test/Index/attributes-cuda.cu b/clang/test/Index/attributes-cuda.cu
index 824bdb4c883..51f4aedd198 100644
--- a/clang/test/Index/attributes-cuda.cu
+++ b/clang/test/Index/attributes-cuda.cu
@@ -1,4 +1,6 @@
 // RUN: c-index-test -test-load-source all -x cuda %s | FileCheck %s
+// RUN: c-index-test -test-load-source all -x cuda --cuda-host-only %s | FileCheck %s
+// RUN: c-index-test -test-load-source all -x cuda --cuda-device-only %s | FileCheck %s
 
 __attribute__((device)) void f_device();
 __attribute__((global)) void f_global();
@@ -6,13 +8,13 @@ __attribute__((constant)) int* g_constant;
 __attribute__((shared)) float *g_shared;
 __attribute__((host)) void f_host();
 
-// CHECK:       attributes-cuda.cu:3:30: FunctionDecl=f_device:3:30
-// CHECK-NEXT:  attributes-cuda.cu:3:16: attribute(device)
-// CHECK:       attributes-cuda.cu:4:30: FunctionDecl=f_global:4:30
-// CHECK-NEXT:  attributes-cuda.cu:4:16: attribute(global)
-// CHECK:       attributes-cuda.cu:5:32: VarDecl=g_constant:5:32 (Definition)
-// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(constant)
-// CHECK:       attributes-cuda.cu:6:32: VarDecl=g_shared:6:32 (Definition)
-// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(shared)
-// CHECK:       attributes-cuda.cu:7:28: FunctionDecl=f_host:7:28
-// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(host)
+// CHECK:       attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30
+// CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(device)
+// CHECK:       attributes-cuda.cu:6:30: FunctionDecl=f_global:6:30
+// CHECK-NEXT:  attributes-cuda.cu:6:16: attribute(global)
+// CHECK:       attributes-cuda.cu:7:32: VarDecl=g_constant:7:32 (Definition)
+// CHECK-NEXT:  attributes-cuda.cu:7:16: attribute(constant)
+// CHECK:       attributes-cuda.cu:8:32: VarDecl=g_shared:8:32 (Definition)
+// CHECK-NEXT:  attributes-cuda.cu:8:16: attribute(shared)
+// CHECK:       attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28
+// CHECK-NEXT:  attributes-cuda.cu:9:16: attribute(host)
diff --git a/clang/test/Index/index-file.cu b/clang/test/Index/index-file.cu
new file mode 100644
index 00000000000..26b93f06945
--- /dev/null
+++ b/clang/test/Index/index-file.cu
@@ -0,0 +1,9 @@
+// Make sure we can process CUDA file even if driver creates multiple jobs
+// RUN: c-index-test -test-load-source all %s | FileCheck %s -check-prefix=CHECK-ANY
+// Make sure we process correct side of cuda compilation
+// RUN: c-index-test -test-load-source all --cuda-host-only %s | FileCheck %s -check-prefix=CHECK-HOST
+// RUN: c-index-test -test-load-source all --cuda-device-only %s | FileCheck %s -check-prefix=CHECK-DEVICE
+
+// CHECK-ANY: macro definition=__cplusplus
+// CHECK-HOST-NOT: macro definition=__CUDA_ARCH__
+// CHECK-DEVICE: macro definition=__CUDA_ARCH__
author	Artem Belevich <tra@google.com>	2015-07-13 23:27:56 +0000
committer	Artem Belevich <tra@google.com>	2015-07-13 23:27:56 +0000
commit	0ff05cd1651474678e8f503cae4956f0c342bf67 (patch)
tree	98c608f18a4fd854516da066943e603c71280988 /clang/test
parent	2eacca86ef2c255bae8eff43056dfa6c57ae7092 (diff)
download	bcm5719-llvm-0ff05cd1651474678e8f503cae4956f0c342bf67.tar.gz bcm5719-llvm-0ff05cd1651474678e8f503cae4956f0c342bf67.zip