diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2018-12-18 19:20:15 +0000 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-12-18 19:20:15 +0000 |
| commit | 29d47fcb3063a6f743a474ea5ecd961d6c86c258 (patch) | |
| tree | 13ddc51ed114e3696f4edbf0b815f84c890090ae | |
| parent | 41031d97f215bdea5d9f75653dd93609d76b5650 (diff) | |
| download | bcm5719-llvm-29d47fcb3063a6f743a474ea5ecd961d6c86c258.tar.gz bcm5719-llvm-29d47fcb3063a6f743a474ea5ecd961d6c86c258.zip | |
[OPENMP][NVPTX]Added extra sync point to the inter-warp copy function.
The parallel reduction operation requires an extra synchronization point
in the inter-warp copy function to avoid divergence.
llvm-svn: 349525
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 5 | ||||
| -rw-r--r-- | clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp | 4 |
2 files changed, 9 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index b055132ef01..59086d219b0 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -3089,6 +3089,7 @@ static void emitReductionListCopy( /// void inter_warp_copy_func(void* reduce_data, num_warps) /// shared smem[warp_size]; /// For all data entries D in reduce_data: +/// sync /// If (I am the first lane in each warp) /// Copy my local D to smem[warp_id] /// sync @@ -3203,6 +3204,10 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, Bld.CreateCondBr(Cmp, BodyBB, ExitBB); CGF.EmitBlock(BodyBB); } + // kmpc_barrier. + CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then"); llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else"); llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont"); diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index dd93b0c1b9e..34ad93b695f 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -190,6 +190,7 @@ int bar(int n){ // CHECK: [[CNT:%.+]] = load i32, i32* [[CNT_ADDR]], // CHECK: [[DONE_COPY:%.+]] = icmp ult i32 [[CNT]], 2 // CHECK: br i1 [[DONE_COPY]], label + // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] // @@ -427,6 +428,7 @@ int bar(int n){ // CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 // CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 // CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* + // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] // @@ -466,6 +468,7 @@ int bar(int n){ // // CHECK: [[READ_CONT]] // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ + // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] // @@ -740,6 +743,7 @@ int bar(int n){ // CHECK-DAG: [[LANEID:%.+]] = and i32 {{.+}}, 31 // CHECK-DAG: [[WARPID:%.+]] = ashr i32 {{.+}}, 5 // CHECK-DAG: [[RED_LIST:%.+]] = bitcast i8* {{.+}} to [[RLT]]* + // CHECK: call void @__kmpc_barrier(%struct.ident_t* @ // CHECK: [[IS_WARP_MASTER:%.+]] = icmp eq i32 [[LANEID]], 0 // CHECK: br i1 [[IS_WARP_MASTER]], label {{%?}}[[DO_COPY:.+]], label {{%?}}[[COPY_ELSE:.+]] // |

