summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp7
-rw-r--r--clang/test/OpenMP/nvptx_parallel_codegen.cpp8
2 files changed, 9 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 48dcbbf3cab..8aa03deee7f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -2669,8 +2669,9 @@ void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
llvm::ConstantPointerNull::get(
cast<llvm::PointerType>(getIdentTyPointerTy())),
llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
- CGF.EmitRuntimeCall(
+ llvm::CallInst *Call = CGF.EmitRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+ Call->setConvergent();
}
void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
@@ -2684,7 +2685,9 @@ void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
unsigned Flags = getDefaultFlagsForBarriers(Kind);
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
getThreadID(CGF, Loc)};
- CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+ llvm::CallInst *Call = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+ Call->setConvergent();
}
void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index cdbc8872445..155ffc5100e 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -88,7 +88,7 @@ int bar(int n){
// CHECK: br label {{%?}}[[AWAIT_WORK:.+]]
//
// CHECK: [[AWAIT_WORK]]
-// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
+// CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#BARRIER_ATTRS:]]
// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]
// CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
// store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
@@ -318,10 +318,10 @@ int bar(int n){
// CHECK: define internal void [[PARALLEL_FN4]](
// CHECK: [[A:%.+]] = alloca i[[SZ:32|64]],
// CHECK: store i[[SZ]] 45, i[[SZ]]* %a,
-// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) #[[#BARRIER_ATTRS]]
// CHECK: ret void
-// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[BARRIER_ATTRS:.+]]
+// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[#BARRIER_ATTRS]]
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}_worker()
// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}(
@@ -367,6 +367,6 @@ int bar(int n){
// CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]],
// CHECK: br label
-// CHECK: attributes #[[BARRIER_ATTRS]] = {{.*}} convergent {{.*}}
+// CHECK: attributes #[[#BARRIER_ATTRS]] = {{.*}} convergent {{.*}}
#endif
OpenPOWER on IntegriCloud