diff options
-rw-r--r-- | openmp/runtime/src/z_Linux_asm.s | 220 | ||||
-rw-r--r-- | openmp/runtime/src/z_Linux_util.c | 2 |
2 files changed, 221 insertions, 1 deletions
diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s index 5ed8e8809fc..128379ee8b0 100644 --- a/openmp/runtime/src/z_Linux_asm.s +++ b/openmp/runtime/src/z_Linux_asm.s @@ -1555,6 +1555,226 @@ KMP_LABEL(kmp_1): #endif /* KMP_OS_LINUX && KMP_ARCH_AARCH64 */ +#if KMP_ARCH_PPC64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; +// } +// +// parameters: +// r3: pkfn +// r4: gtid +// r5: tid +// r6: argc +// r7: p_argv +// r8: &exit_frame +// +// return: r3 (always 1/TRUE) +// + .text +# if KMP_ARCH_PPC64_LE + .abiversion 2 +# endif + .globl __kmp_invoke_microtask + +# if KMP_ARCH_PPC64_LE + .p2align 4 +# else + .p2align 2 +# endif + + .type __kmp_invoke_microtask,@function + +# if KMP_ARCH_PPC64_LE +__kmp_invoke_microtask: +.Lfunc_begin0: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 +# else + .section .opd,"aw",@progbits +__kmp_invoke_microtask: + .p2align 3 + .quad .Lfunc_begin0 + .quad .TOC.@tocbase + .quad 0 + .text +.Lfunc_begin0: +# endif + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 48 +// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the +// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, +// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes +// to save r30 to hold a copy of r8. + + .cfi_startproc + mflr 0 + std 31, -8(1) + std 0, 16(1) + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + .cfi_def_cfa_register r31 + .cfi_offset r31, -8 + .cfi_offset lr, 16 + +// Compute the size necessary for the local stack frame. +# if KMP_ARCH_PPC64_LE + li 12, 72 +# else + li 12, 88 +# endif + sldi 0, 6, 3 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes, except +// under the BG/Q CNK, where it must be to 32 bytes). +# if KMP_OS_CNK + li 0, -32 +# else + li 0, -16 +# endif + and 12, 0, 12 + +// Establish the local stack frame. + stdux 1, 1, 12 + +# if OMPT_SUPPORT + .cfi_offset r30, -16 + std 30, -16(31) + mr 30, 8 +# endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -20(31) + stw 5, -24(31) + + mr 12, 6 + mr 4, 7 + + cmpwi 0, 12, 1 + blt 0, .Lcall + + ld 5, 0(4) + + cmpwi 0, 12, 2 + blt 0, .Lcall + + ld 6, 8(4) + + cmpwi 0, 12, 3 + blt 0, .Lcall + + ld 7, 16(4) + + cmpwi 0, 12, 4 + blt 0, .Lcall + + ld 8, 24(4) + + cmpwi 0, 12, 5 + blt 0, .Lcall + + ld 9, 32(4) + + cmpwi 0, 12, 6 + blt 0, .Lcall + + ld 10, 40(4) + + cmpwi 0, 12, 7 + blt 0, .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 + mtctr 12 + +// These are set to 8 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and +// 32 + 8*8 == 96 bytes above r1 for ELFv2. + addi 4, 4, 40 +# if KMP_ARCH_PPC64_LE + addi 12, 1, 88 +# else + addi 12, 1, 104 +# endif + +.Lnext: + ldu 0, 8(4) + stdu 0, 8(12) + bdnz .Lnext + +.Lcall: +# if KMP_ARCH_PPC64_LE + std 2, 24(1) + mr 12, 3 +#else + std 2, 40(1) +// For ELFv1, we need to load the actual function address from the function descriptor. + ld 12, 0(3) + ld 2, 8(3) + ld 11, 16(3) +#endif + + addi 3, 31, -20 + addi 4, 31, -24 + + mtctr 12 + bctrl +# if KMP_ARCH_PPC64_LE + ld 2, 24(1) +# else + ld 2, 40(1) +# endif + +# if OMPT_SUPPORT + li 3, 0 + std 3, 0(30) +# endif + + li 3, 1 + +# if OMPT_SUPPORT + ld 30, -16(31) +# endif + + mr 1, 31 + ld 0, 16(1) + ld 31, -8(1) + mtlr 0 + blr + + .long 0 + .quad 0 +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_PPC64 */ + #if KMP_ARCH_ARM .data .comm .gomp_critical_user_,32,8 diff --git a/openmp/runtime/src/z_Linux_util.c b/openmp/runtime/src/z_Linux_util.c index 126509d6502..4543c55c84d 100644 --- a/openmp/runtime/src/z_Linux_util.c +++ b/openmp/runtime/src/z_Linux_util.c @@ -2575,7 +2575,7 @@ __kmp_get_load_balance( int max ) #endif // USE_LOAD_BALANCE -#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64)) +#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || (KMP_OS_LINUX && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function |