diff options
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 58 |
1 files changed, 47 insertions, 11 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 22e1e48a225..82be31f0f80 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -61,6 +61,12 @@ enum OpenMPRTLFunctionNVPTX { /// lane_offset, int16_t shortCircuit), /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); OMPRTL_NVPTX__kmpc_parallel_reduce_nowait, + /// \brief Call to __kmpc_nvptx_simd_reduce_nowait(kmp_int32 + /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + /// lane_offset, int16_t shortCircuit), + /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num)); + OMPRTL_NVPTX__kmpc_simd_reduce_nowait, /// \brief Call to __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, /// int32_t num_vars, size_t reduce_size, void *reduce_data, /// void (*kmp_ShuffleReductFctPtr)(void *rhs, int16_t lane_id, int16_t @@ -1028,6 +1034,33 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait"); break; } + case OMPRTL_NVPTX__kmpc_simd_reduce_nowait: { + // Build int32_t kmpc_nvptx_simd_reduce_nowait(kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void* reduce_data, + // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t + // lane_offset, int16_t Algorithm Version), + // void (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num)); + llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty, + CGM.Int16Ty, CGM.Int16Ty}; + auto *ShuffleReduceFnTy = + llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty}; + auto *InterWarpCopyFnTy = + llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.Int32Ty, + CGM.SizeTy, + CGM.VoidPtrTy, + ShuffleReduceFnTy->getPointerTo(), + InterWarpCopyFnTy->getPointerTo()}; + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction( + FnTy, /*Name=*/"__kmpc_nvptx_simd_reduce_nowait"); + break; + } case OMPRTL_NVPTX__kmpc_teams_reduce_nowait: { // Build int32_t __kmpc_nvptx_teams_reduce_nowait(int32_t global_tid, // int32_t num_vars, size_t reduce_size, void *reduce_data, @@ -2703,8 +2736,8 @@ void CGOpenMPRuntimeNVPTX::emitReduction( bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - // FIXME: Add support for simd reduction. - assert((TeamsReduction || ParallelReduction) && + bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind); + assert((TeamsReduction || ParallelReduction || SimdReduction) && "Invalid reduction selection in emitReduction."); ASTContext &C = CGM.getContext(); @@ -2764,19 +2797,22 @@ void CGOpenMPRuntimeNVPTX::emitReduction( llvm::Value *InterWarpCopyFn = emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc); - llvm::Value *Res = nullptr; - if (ParallelReduction) { - llvm::Value *Args[] = {ThreadId, - CGF.Builder.getInt32(RHSExprs.size()), - ReductionArrayTySize, - RL, - ShuffleAndReduceFn, - InterWarpCopyFn}; + llvm::Value *Args[] = {ThreadId, + CGF.Builder.getInt32(RHSExprs.size()), + ReductionArrayTySize, + RL, + ShuffleAndReduceFn, + InterWarpCopyFn}; + llvm::Value *Res = nullptr; + if (ParallelReduction) Res = CGF.EmitRuntimeCall( createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_reduce_nowait), Args); - } + else if (SimdReduction) + Res = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_simd_reduce_nowait), + Args); if (TeamsReduction) { llvm::Value *ScratchPadCopyFn = |