summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp11
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h6
-rw-r--r--clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp106
3 files changed, 123 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 6b5591b8b5c..90702a19e4f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -642,6 +642,17 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
}
+void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) {
+ // Do nothing in case of Spmd mode and L0 parallel.
+ // TODO: If in Spmd mode and L1 parallel emit the clause.
+ if (isInSpmdExecutionMode())
+ return;
+
+ CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+}
+
void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 7a68c76664f..d1ff0e8a24a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -170,6 +170,12 @@ protected:
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+ /// \brief Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+ /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+ virtual void emitProcBindClause(CodeGenFunction &CGF,
+ OpenMPProcBindClauseKind ProcBind,
+ SourceLocation Loc) override;
+
/// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
new file mode 100644
index 00000000000..91c6de1b85d
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -0,0 +1,106 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of all 3 target regions on the gpu is set to SPMD Mode.
+// CHECK-DAG: {{@__omp_offloading_.+l22}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 0
+// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 0
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a = 0;
+ short aa = 0;
+ tx b[10];
+
+ #pragma omp target parallel proc_bind(master)
+ {
+ }
+
+ #pragma omp target parallel proc_bind(spread)
+ {
+ aa += 1;
+ }
+
+ #pragma omp target parallel proc_bind(close)
+ {
+ a += 1;
+ aa += 1;
+ b[2] += 1;
+ }
+
+ return a;
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l22}}(
+ // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: br label {{%?}}[[EXEC:.+]]
+ //
+ // CHECK: [[EXEC]]
+ // CHECK-NOT: call void @__kmpc_push_proc_bind
+ // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+ // CHECK: br label {{%?}}[[DONE:.+]]
+ //
+ // CHECK: [[DONE]]
+ // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[EXIT]]
+ // CHECK: ret void
+ // CHECK: }
+
+
+
+
+
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}(
+ // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: br label {{%?}}[[EXEC:.+]]
+ //
+ // CHECK: [[EXEC]]
+ // CHECK-NOT: call void @__kmpc_push_proc_bind
+ // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+ // CHECK: br label {{%?}}[[DONE:.+]]
+ //
+ // CHECK: [[DONE]]
+ // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[EXIT]]
+ // CHECK: ret void
+ // CHECK: }
+
+
+
+
+ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}(
+ // CHECK: call void @__kmpc_spmd_kernel_init(
+ // CHECK: br label {{%?}}[[EXEC:.+]]
+ //
+ // CHECK: [[EXEC]]
+ // CHECK-NOT: call void @__kmpc_push_proc_bind
+ // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null
+ // CHECK: br label {{%?}}[[DONE:.+]]
+ //
+ // CHECK: [[DONE]]
+ // CHECK: call void @__kmpc_spmd_kernel_deinit()
+ // CHECK: br label {{%?}}[[EXIT:.+]]
+ //
+ // CHECK: [[EXIT]]
+ // CHECK: ret void
+ // CHECK: }
+#endif
OpenPOWER on IntegriCloud