diff options
Diffstat (limited to 'final/runtime/src/z_Linux_asm.S')
-rw-r--r-- | final/runtime/src/z_Linux_asm.S | 1730 |
1 files changed, 1730 insertions, 0 deletions
diff --git a/final/runtime/src/z_Linux_asm.S b/final/runtime/src/z_Linux_asm.S new file mode 100644 index 0000000..c9fbc23 --- /dev/null +++ b/final/runtime/src/z_Linux_asm.S @@ -0,0 +1,1730 @@ +// z_Linux_asm.S: - microtasking routines specifically +// written for Intel platforms running Linux* OS + +// +////===----------------------------------------------------------------------===// +//// +//// The LLVM Compiler Infrastructure +//// +//// This file is dual licensed under the MIT and the University of Illinois Open +//// Source Licenses. See LICENSE.txt for details. +//// +////===----------------------------------------------------------------------===// +// + +// ----------------------------------------------------------------------- +// macros +// ----------------------------------------------------------------------- + +#include "kmp_config.h" + +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + +# if KMP_MIC +// the 'delay r16/r32/r64' should be used instead of the 'pause'. +// The delay operation has the effect of removing the current thread from +// the round-robin HT mechanism, and therefore speeds up the issue rate of +// the other threads on the same core. +// +// A value of 0 works fine for <= 2 threads per core, but causes the EPCC +// barrier time to increase greatly for 3 or more threads per core. +// +// A value of 100 works pretty well for up to 4 threads per core, but isn't +// quite as fast as 0 for 2 threads per core. +// +// We need to check what happens for oversubscription / > 4 threads per core. +// It is possible that we need to pass the delay value in as a parameter +// that the caller determines based on the total # threads / # cores. +// +//.macro pause_op +// mov $100, %rax +// delay %rax +//.endm +# else +# define pause_op .byte 0xf3,0x90 +# endif // KMP_MIC + +# if KMP_OS_DARWIN +# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols +# define KMP_LABEL(x) L_##x // form the name of label +.macro KMP_CFI_DEF_OFFSET +.endmacro +.macro KMP_CFI_OFFSET +.endmacro +.macro KMP_CFI_REGISTER +.endmacro +.macro KMP_CFI_DEF +.endmacro +.macro ALIGN + .align $0 +.endmacro +.macro DEBUG_INFO +/* Not sure what .size does in icc, not sure if we need to do something + similar for OS X*. +*/ +.endmacro +.macro PROC + ALIGN 4 + .globl KMP_PREFIX_UNDERSCORE($0) +KMP_PREFIX_UNDERSCORE($0): +.endmacro +# else // KMP_OS_DARWIN +# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols +// Format labels so that they don't override function names in gdb's backtraces +// MIC assembler doesn't accept .L syntax, the L works fine there (as well as +// on OS X*) +# if KMP_MIC +# define KMP_LABEL(x) L_##x // local label +# else +# define KMP_LABEL(x) .L_##x // local label hidden from backtraces +# endif // KMP_MIC +.macro ALIGN size + .align 1<<(\size) +.endm +.macro DEBUG_INFO proc + .cfi_endproc +// Not sure why we need .type and .size for the functions + .align 16 + .type \proc,@function + .size \proc,.-\proc +.endm +.macro PROC proc + ALIGN 4 + .globl KMP_PREFIX_UNDERSCORE(\proc) +KMP_PREFIX_UNDERSCORE(\proc): + .cfi_startproc +.endm +.macro KMP_CFI_DEF_OFFSET sz + .cfi_def_cfa_offset \sz +.endm +.macro KMP_CFI_OFFSET reg, sz + .cfi_offset \reg,\sz +.endm +.macro KMP_CFI_REGISTER reg + .cfi_def_cfa_register \reg +.endm +.macro KMP_CFI_DEF reg, sz + .cfi_def_cfa \reg,\sz +.endm +# endif // KMP_OS_DARWIN +#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 + +#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 + +# if KMP_OS_DARWIN +# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols +# define KMP_LABEL(x) L_##x // form the name of label + +.macro ALIGN + .align $0 +.endmacro + +.macro DEBUG_INFO +/* Not sure what .size does in icc, not sure if we need to do something + similar for OS X*. +*/ +.endmacro + +.macro PROC + ALIGN 4 + .globl KMP_PREFIX_UNDERSCORE($0) +KMP_PREFIX_UNDERSCORE($0): +.endmacro +# else // KMP_OS_DARWIN +# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols +// Format labels so that they don't override function names in gdb's backtraces +# define KMP_LABEL(x) .L_##x // local label hidden from backtraces + +.macro ALIGN size + .align 1<<(\size) +.endm + +.macro DEBUG_INFO proc + .cfi_endproc +// Not sure why we need .type and .size for the functions + ALIGN 2 + .type \proc,@function + .size \proc,.-\proc +.endm + +.macro PROC proc + ALIGN 2 + .globl KMP_PREFIX_UNDERSCORE(\proc) +KMP_PREFIX_UNDERSCORE(\proc): + .cfi_startproc +.endm +# endif // KMP_OS_DARWIN + +#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 + +// ----------------------------------------------------------------------- +// data +// ----------------------------------------------------------------------- + +#ifdef KMP_GOMP_COMPAT + +// Support for unnamed common blocks. +// +// Because the symbol ".gomp_critical_user_" contains a ".", we have to +// put this stuff in assembly. + +# if KMP_ARCH_X86 +# if KMP_OS_DARWIN + .data + .comm .gomp_critical_user_,32 + .data + .globl ___kmp_unnamed_critical_addr +___kmp_unnamed_critical_addr: + .long .gomp_critical_user_ +# else /* Linux* OS */ + .data + .comm .gomp_critical_user_,32,8 + .data + ALIGN 4 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .4byte .gomp_critical_user_ + .type __kmp_unnamed_critical_addr,@object + .size __kmp_unnamed_critical_addr,4 +# endif /* KMP_OS_DARWIN */ +# endif /* KMP_ARCH_X86 */ + +# if KMP_ARCH_X86_64 +# if KMP_OS_DARWIN + .data + .comm .gomp_critical_user_,32 + .data + .globl ___kmp_unnamed_critical_addr +___kmp_unnamed_critical_addr: + .quad .gomp_critical_user_ +# else /* Linux* OS */ + .data + .comm .gomp_critical_user_,32,8 + .data + ALIGN 8 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .8byte .gomp_critical_user_ + .type __kmp_unnamed_critical_addr,@object + .size __kmp_unnamed_critical_addr,8 +# endif /* KMP_OS_DARWIN */ +# endif /* KMP_ARCH_X86_64 */ + +#endif /* KMP_GOMP_COMPAT */ + + +#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 + +// ----------------------------------------------------------------------- +// microtasking routines specifically written for IA-32 architecture +// running Linux* OS +// ----------------------------------------------------------------------- + + .ident "Intel Corporation" + .data + ALIGN 4 +// void +// __kmp_x86_pause( void ); + + .text + PROC __kmp_x86_pause + + pause_op + ret + + DEBUG_INFO __kmp_x86_pause + +// void +// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); + + PROC __kmp_x86_cpuid + + pushl %ebp + movl %esp,%ebp + pushl %edi + pushl %ebx + pushl %ecx + pushl %edx + + movl 8(%ebp), %eax + movl 12(%ebp), %ecx + cpuid // Query the CPUID for the current processor + + movl 16(%ebp), %edi + movl %eax, 0(%edi) + movl %ebx, 4(%edi) + movl %ecx, 8(%edi) + movl %edx, 12(%edi) + + popl %edx + popl %ecx + popl %ebx + popl %edi + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_x86_cpuid + + +# if !KMP_ASM_INTRINS + +//------------------------------------------------------------------------ +// kmp_int32 +// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); + + PROC __kmp_test_then_add32 + + movl 4(%esp), %ecx + movl 8(%esp), %eax + lock + xaddl %eax,(%ecx) + ret + + DEBUG_INFO __kmp_test_then_add32 + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed8 +// +// kmp_int32 +// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); +// +// parameters: +// p: 4(%esp) +// d: 8(%esp) +// +// return: %al + PROC __kmp_xchg_fixed8 + + movl 4(%esp), %ecx // "p" + movb 8(%esp), %al // "d" + + lock + xchgb %al,(%ecx) + ret + + DEBUG_INFO __kmp_xchg_fixed8 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed16 +// +// kmp_int16 +// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); +// +// parameters: +// p: 4(%esp) +// d: 8(%esp) +// return: %ax + PROC __kmp_xchg_fixed16 + + movl 4(%esp), %ecx // "p" + movw 8(%esp), %ax // "d" + + lock + xchgw %ax,(%ecx) + ret + + DEBUG_INFO __kmp_xchg_fixed16 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed32 +// +// kmp_int32 +// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); +// +// parameters: +// p: 4(%esp) +// d: 8(%esp) +// +// return: %eax + PROC __kmp_xchg_fixed32 + + movl 4(%esp), %ecx // "p" + movl 8(%esp), %eax // "d" + + lock + xchgl %eax,(%ecx) + ret + + DEBUG_INFO __kmp_xchg_fixed32 + + +// kmp_int8 +// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); + PROC __kmp_compare_and_store8 + + movl 4(%esp), %ecx + movb 8(%esp), %al + movb 12(%esp), %dl + lock + cmpxchgb %dl,(%ecx) + sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + ret + + DEBUG_INFO __kmp_compare_and_store8 + +// kmp_int16 +// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); + PROC __kmp_compare_and_store16 + + movl 4(%esp), %ecx + movw 8(%esp), %ax + movw 12(%esp), %dx + lock + cmpxchgw %dx,(%ecx) + sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + ret + + DEBUG_INFO __kmp_compare_and_store16 + +// kmp_int32 +// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); + PROC __kmp_compare_and_store32 + + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + lock + cmpxchgl %edx,(%ecx) + sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + ret + + DEBUG_INFO __kmp_compare_and_store32 + +// kmp_int32 +// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); + PROC __kmp_compare_and_store64 + + pushl %ebp + movl %esp, %ebp + pushl %ebx + pushl %edi + movl 8(%ebp), %edi + movl 12(%ebp), %eax // "cv" low order word + movl 16(%ebp), %edx // "cv" high order word + movl 20(%ebp), %ebx // "sv" low order word + movl 24(%ebp), %ecx // "sv" high order word + lock + cmpxchg8b (%edi) + sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction + popl %edi + popl %ebx + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_compare_and_store64 + +// kmp_int8 +// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); + PROC __kmp_compare_and_store_ret8 + + movl 4(%esp), %ecx + movb 8(%esp), %al + movb 12(%esp), %dl + lock + cmpxchgb %dl,(%ecx) + ret + + DEBUG_INFO __kmp_compare_and_store_ret8 + +// kmp_int16 +// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, +// kmp_int16 sv); + PROC __kmp_compare_and_store_ret16 + + movl 4(%esp), %ecx + movw 8(%esp), %ax + movw 12(%esp), %dx + lock + cmpxchgw %dx,(%ecx) + ret + + DEBUG_INFO __kmp_compare_and_store_ret16 + +// kmp_int32 +// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, +// kmp_int32 sv); + PROC __kmp_compare_and_store_ret32 + + movl 4(%esp), %ecx + movl 8(%esp), %eax + movl 12(%esp), %edx + lock + cmpxchgl %edx,(%ecx) + ret + + DEBUG_INFO __kmp_compare_and_store_ret32 + +// kmp_int64 +// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, +// kmp_int64 sv); + PROC __kmp_compare_and_store_ret64 + + pushl %ebp + movl %esp, %ebp + pushl %ebx + pushl %edi + movl 8(%ebp), %edi + movl 12(%ebp), %eax // "cv" low order word + movl 16(%ebp), %edx // "cv" high order word + movl 20(%ebp), %ebx // "sv" low order word + movl 24(%ebp), %ecx // "sv" high order word + lock + cmpxchg8b (%edi) + popl %edi + popl %ebx + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_compare_and_store_ret64 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_real32 +// +// kmp_real32 +// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); +// +// parameters: +// addr: 4(%esp) +// data: 8(%esp) +// +// return: %eax + PROC __kmp_xchg_real32 + + pushl %ebp + movl %esp, %ebp + subl $4, %esp + pushl %esi + + movl 4(%ebp), %esi + flds (%esi) + // load <addr> + fsts -4(%ebp) + // store old value + + movl 8(%ebp), %eax + + lock + xchgl %eax, (%esi) + + flds -4(%ebp) + // return old value + + popl %esi + movl %ebp, %esp + popl %ebp + ret + + DEBUG_INFO __kmp_xchg_real32 + +# endif /* !KMP_ASM_INTRINS */ + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_load_x87_fpu_control_word +// +// void +// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: 4(%esp) + PROC __kmp_load_x87_fpu_control_word + + movl 4(%esp), %eax + fldcw (%eax) + ret + + DEBUG_INFO __kmp_load_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_store_x87_fpu_control_word +// +// void +// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: 4(%esp) + PROC __kmp_store_x87_fpu_control_word + + movl 4(%esp), %eax + fstcw (%eax) + ret + + DEBUG_INFO __kmp_store_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_clear_x87_fpu_status_word +// +// void +// __kmp_clear_x87_fpu_status_word(); + PROC __kmp_clear_x87_fpu_status_word + + fnclex + ret + + DEBUG_INFO __kmp_clear_x87_fpu_status_word + + +//------------------------------------------------------------------------ +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & gtid, argv[0], ... ); +// return 1; +// } + +// -- Begin __kmp_invoke_microtask +// mark_begin; + PROC __kmp_invoke_microtask + + pushl %ebp + KMP_CFI_DEF_OFFSET 8 + KMP_CFI_OFFSET ebp,-8 + movl %esp,%ebp // establish the base pointer for this routine. + KMP_CFI_REGISTER ebp + subl $8,%esp // allocate space for two local variables. + // These varibales are: + // argv: -4(%ebp) + // temp: -8(%ebp) + // + pushl %ebx // save %ebx to use during this routine + // +#if OMPT_SUPPORT + movl 28(%ebp),%ebx // get exit_frame address + movl %ebp,(%ebx) // save exit_frame +#endif + + movl 20(%ebp),%ebx // Stack alignment - # args + addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) + shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 + movl %esp,%eax // + subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this + movl %eax,%ebx // Save to %ebx + andl $0xFFFFFF80,%eax // mask off 7 bits + subl %eax,%ebx // Amount to subtract from %esp + subl %ebx,%esp // Prepare the stack ptr -- + // now it will be aligned on 128-byte boundary at the call + + movl 24(%ebp),%eax // copy from p_argv[] + movl %eax,-4(%ebp) // into the local variable *argv. + + movl 20(%ebp),%ebx // argc is 20(%ebp) + shll $2,%ebx + +KMP_LABEL(invoke_2): + cmpl $0,%ebx + jg KMP_LABEL(invoke_4) + jmp KMP_LABEL(invoke_3) + ALIGN 2 +KMP_LABEL(invoke_4): + movl -4(%ebp),%eax + subl $4,%ebx // decrement argc. + addl %ebx,%eax // index into argv. + movl (%eax),%edx + pushl %edx + + jmp KMP_LABEL(invoke_2) + ALIGN 2 +KMP_LABEL(invoke_3): + leal 16(%ebp),%eax // push & tid + pushl %eax + + leal 12(%ebp),%eax // push & gtid + pushl %eax + + movl 8(%ebp),%ebx + call *%ebx // call (*pkfn)(); + + movl $1,%eax // return 1; + + movl -12(%ebp),%ebx // restore %ebx + leave + KMP_CFI_DEF esp,4 + ret + + DEBUG_INFO __kmp_invoke_microtask +// -- End __kmp_invoke_microtask + + +// kmp_uint64 +// __kmp_hardware_timestamp(void) + PROC __kmp_hardware_timestamp + rdtsc + ret + + DEBUG_INFO __kmp_hardware_timestamp +// -- End __kmp_hardware_timestamp + +#endif /* KMP_ARCH_X86 */ + + +#if KMP_ARCH_X86_64 + +// ----------------------------------------------------------------------- +// microtasking routines specifically written for IA-32 architecture and +// Intel(R) 64 running Linux* OS +// ----------------------------------------------------------------------- + +// -- Machine type P +// mark_description "Intel Corporation"; + .ident "Intel Corporation" +// -- .file "z_Linux_asm.S" + .data + ALIGN 4 + +// To prevent getting our code into .data section .text added to every routine +// definition for x86_64. +//------------------------------------------------------------------------ +// FUNCTION __kmp_x86_cpuid +// +// void +// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); +// +// parameters: +// mode: %edi +// mode2: %esi +// cpuid_buffer: %rdx + .text + PROC __kmp_x86_cpuid + + pushq %rbp + movq %rsp,%rbp + pushq %rbx // callee-save register + + movl %esi, %ecx // "mode2" + movl %edi, %eax // "mode" + movq %rdx, %rsi // cpuid_buffer + cpuid // Query the CPUID for the current processor + + movl %eax, 0(%rsi) // store results into buffer + movl %ebx, 4(%rsi) + movl %ecx, 8(%rsi) + movl %edx, 12(%rsi) + + popq %rbx // callee-save register + movq %rbp, %rsp + popq %rbp + ret + + DEBUG_INFO __kmp_x86_cpuid + + + +# if !KMP_ASM_INTRINS + +//------------------------------------------------------------------------ +// FUNCTION __kmp_test_then_add32 +// +// kmp_int32 +// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); +// +// parameters: +// p: %rdi +// d: %esi +// +// return: %eax + .text + PROC __kmp_test_then_add32 + + movl %esi, %eax // "d" + lock + xaddl %eax,(%rdi) + ret + + DEBUG_INFO __kmp_test_then_add32 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_test_then_add64 +// +// kmp_int64 +// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); +// +// parameters: +// p: %rdi +// d: %rsi +// return: %rax + .text + PROC __kmp_test_then_add64 + + movq %rsi, %rax // "d" + lock + xaddq %rax,(%rdi) + ret + + DEBUG_INFO __kmp_test_then_add64 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed8 +// +// kmp_int32 +// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); +// +// parameters: +// p: %rdi +// d: %sil +// +// return: %al + .text + PROC __kmp_xchg_fixed8 + + movb %sil, %al // "d" + + lock + xchgb %al,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed8 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed16 +// +// kmp_int16 +// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); +// +// parameters: +// p: %rdi +// d: %si +// return: %ax + .text + PROC __kmp_xchg_fixed16 + + movw %si, %ax // "d" + + lock + xchgw %ax,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed16 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed32 +// +// kmp_int32 +// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); +// +// parameters: +// p: %rdi +// d: %esi +// +// return: %eax + .text + PROC __kmp_xchg_fixed32 + + movl %esi, %eax // "d" + + lock + xchgl %eax,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed32 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_fixed64 +// +// kmp_int64 +// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); +// +// parameters: +// p: %rdi +// d: %rsi +// return: %rax + .text + PROC __kmp_xchg_fixed64 + + movq %rsi, %rax // "d" + + lock + xchgq %rax,(%rdi) + ret + + DEBUG_INFO __kmp_xchg_fixed64 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store8 +// +// kmp_int8 +// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + .text + PROC __kmp_compare_and_store8 + + movb %sil, %al // "cv" + lock + cmpxchgb %dl,(%rdi) + sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store8 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store16 +// +// kmp_int16 +// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +// +// parameters: +// p: %rdi +// cv: %si +// sv: %dx +// +// return: %eax + .text + PROC __kmp_compare_and_store16 + + movw %si, %ax // "cv" + lock + cmpxchgw %dx,(%rdi) + sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store16 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store32 +// +// kmp_int32 +// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + .text + PROC __kmp_compare_and_store32 + + movl %esi, %eax // "cv" + lock + cmpxchgl %edx,(%rdi) + sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store32 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store64 +// +// kmp_int32 +// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +// +// parameters: +// p: %rdi +// cv: %rsi +// sv: %rdx +// return: %eax + .text + PROC __kmp_compare_and_store64 + + movq %rsi, %rax // "cv" + lock + cmpxchgq %rdx,(%rdi) + sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 + andq $1, %rax // sign extend previous instruction for return value + ret + + DEBUG_INFO __kmp_compare_and_store64 + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store_ret8 +// +// kmp_int8 +// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + .text + PROC __kmp_compare_and_store_ret8 + + movb %sil, %al // "cv" + lock + cmpxchgb %dl,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret8 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store_ret16 +// +// kmp_int16 +// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); +// +// parameters: +// p: %rdi +// cv: %si +// sv: %dx +// +// return: %eax + .text + PROC __kmp_compare_and_store_ret16 + + movw %si, %ax // "cv" + lock + cmpxchgw %dx,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret16 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store_ret32 +// +// kmp_int32 +// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); +// +// parameters: +// p: %rdi +// cv: %esi +// sv: %edx +// +// return: %eax + .text + PROC __kmp_compare_and_store_ret32 + + movl %esi, %eax // "cv" + lock + cmpxchgl %edx,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret32 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_compare_and_store_ret64 +// +// kmp_int64 +// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); +// +// parameters: +// p: %rdi +// cv: %rsi +// sv: %rdx +// return: %eax + .text + PROC __kmp_compare_and_store_ret64 + + movq %rsi, %rax // "cv" + lock + cmpxchgq %rdx,(%rdi) + ret + + DEBUG_INFO __kmp_compare_and_store_ret64 + +# endif /* !KMP_ASM_INTRINS */ + + +# if !KMP_MIC + +# if !KMP_ASM_INTRINS + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_real32 +// +// kmp_real32 +// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); +// +// parameters: +// addr: %rdi +// data: %xmm0 (lower 4 bytes) +// +// return: %xmm0 (lower 4 bytes) + .text + PROC __kmp_xchg_real32 + + movd %xmm0, %eax // load "data" to eax + + lock + xchgl %eax, (%rdi) + + movd %eax, %xmm0 // load old value into return register + + ret + + DEBUG_INFO __kmp_xchg_real32 + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_xchg_real64 +// +// kmp_real64 +// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); +// +// parameters: +// addr: %rdi +// data: %xmm0 (lower 8 bytes) +// return: %xmm0 (lower 8 bytes) + .text + PROC __kmp_xchg_real64 + + movd %xmm0, %rax // load "data" to rax + + lock + xchgq %rax, (%rdi) + + movd %rax, %xmm0 // load old value into return register + ret + + DEBUG_INFO __kmp_xchg_real64 + + +# endif /* !KMP_MIC */ + +# endif /* !KMP_ASM_INTRINS */ + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_load_x87_fpu_control_word +// +// void +// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: %rdi + .text + PROC __kmp_load_x87_fpu_control_word + + fldcw (%rdi) + ret + + DEBUG_INFO __kmp_load_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_store_x87_fpu_control_word +// +// void +// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +// +// parameters: +// p: %rdi + .text + PROC __kmp_store_x87_fpu_control_word + + fstcw (%rdi) + ret + + DEBUG_INFO __kmp_store_x87_fpu_control_word + + +//------------------------------------------------------------------------ +// FUNCTION __kmp_clear_x87_fpu_status_word +// +// void +// __kmp_clear_x87_fpu_status_word(); + .text + PROC __kmp_clear_x87_fpu_status_word + +#if KMP_MIC +// TODO: remove the workaround for problem with fnclex instruction (no CQ known) + fstenv -32(%rsp) // store FP env + andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW + fldenv -32(%rsp) // load FP env back + ret +#else + fnclex + ret +#endif + + DEBUG_INFO __kmp_clear_x87_fpu_status_word + + +//------------------------------------------------------------------------ +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; +// } +// +// note: at call to pkfn must have %rsp 128-byte aligned for compiler +// +// parameters: +// %rdi: pkfn +// %esi: gtid +// %edx: tid +// %ecx: argc +// %r8: p_argv +// %r9: &exit_frame +// +// locals: +// __gtid: gtid parm pushed on stack so can pass >id to pkfn +// __tid: tid parm pushed on stack so can pass &tid to pkfn +// +// reg temps: +// %rax: used all over the place +// %rdx: used in stack pointer alignment calculation +// %r11: used to traverse p_argv array +// %rsi: used as temporary for stack parameters +// used as temporary for number of pkfn parms to push +// %rbx: used to hold pkfn address, and zero constant, callee-save +// +// return: %eax (always 1/TRUE) +__gtid = -16 +__tid = -24 + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + PROC __kmp_invoke_microtask + + pushq %rbp // save base pointer + KMP_CFI_DEF_OFFSET 16 + KMP_CFI_OFFSET rbp,-16 + movq %rsp,%rbp // establish the base pointer for this routine. + KMP_CFI_REGISTER rbp + +#if OMPT_SUPPORT + movq %rbp, (%r9) // save exit_frame +#endif + + pushq %rbx // %rbx is callee-saved register + pushq %rsi // Put gtid on stack so can pass &tgid to pkfn + pushq %rdx // Put tid on stack so can pass &tid to pkfn + + movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax + movq $0, %rbx // constant for cmovs later + subq $4, %rax // subtract four args passed in registers to pkfn +#if KMP_MIC + js KMP_LABEL(kmp_0) // jump to movq + jmp KMP_LABEL(kmp_0_exit) // jump ahead +KMP_LABEL(kmp_0): + movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) +KMP_LABEL(kmp_0_exit): +#else + cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) +#endif // KMP_MIC + + movq %rax, %rsi // save max(0, argc-4) -> %rsi for later + shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 + + movq %rsp, %rdx // + subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- + // without align, stack ptr would be this + movq %rdx, %rax // Save to %rax + + andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) + subq %rax, %rdx // Amount to subtract from %rsp + subq %rdx, %rsp // Prepare the stack ptr -- + // now %rsp will align to 128-byte boundary at call site + + // setup pkfn parameter reg and stack + movq %rcx, %rax // argc -> %rax + cmpq $0, %rsi + je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push + shlq $3, %rcx // argc*8 -> %rcx + movq %r8, %rdx // p_argv -> %rdx + addq %rcx, %rdx // &p_argv[argc] -> %rdx + + movq %rsi, %rcx // max (0, argc-4) -> %rcx + +KMP_LABEL(kmp_invoke_push_parms): + // push nth - 7th parms to pkfn on stack + subq $8, %rdx // decrement p_argv pointer to previous parm + movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi + pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) + subl $1, %ecx + +// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e +// if the name of the label that is an operand of this jecxz starts with a dot ("."); +// Apple's linker does not support 1-byte length relocation; +// Resolution: replace all .labelX entries with L_labelX. + + jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left + jmp KMP_LABEL(kmp_invoke_push_parms) + ALIGN 3 +KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. + // order here is important to avoid trashing + // registers used for both input and output parms! + movq %rdi, %rbx // pkfn -> %rbx + leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) + leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) + + movq %r8, %r11 // p_argv -> %r11 + +#if KMP_MIC + cmpq $4, %rax // argc >= 4? + jns KMP_LABEL(kmp_4) // jump to movq + jmp KMP_LABEL(kmp_4_exit) // jump ahead +KMP_LABEL(kmp_4): + movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) +KMP_LABEL(kmp_4_exit): + + cmpq $3, %rax // argc >= 3? + jns KMP_LABEL(kmp_3) // jump to movq + jmp KMP_LABEL(kmp_3_exit) // jump ahead +KMP_LABEL(kmp_3): + movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) +KMP_LABEL(kmp_3_exit): + + cmpq $2, %rax // argc >= 2? + jns KMP_LABEL(kmp_2) // jump to movq + jmp KMP_LABEL(kmp_2_exit) // jump ahead +KMP_LABEL(kmp_2): + movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) +KMP_LABEL(kmp_2_exit): + + cmpq $1, %rax // argc >= 1? + jns KMP_LABEL(kmp_1) // jump to movq + jmp KMP_LABEL(kmp_1_exit) // jump ahead +KMP_LABEL(kmp_1): + movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) +KMP_LABEL(kmp_1_exit): +#else + cmpq $4, %rax // argc >= 4? + cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) + + cmpq $3, %rax // argc >= 3? + cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) + + cmpq $2, %rax // argc >= 2? + cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) + + cmpq $1, %rax // argc >= 1? + cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) +#endif // KMP_MIC + + call *%rbx // call (*pkfn)(); + movq $1, %rax // move 1 into return register; + + movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified + movq %rbp, %rsp // restore stack pointer + popq %rbp // restore frame pointer + KMP_CFI_DEF rsp,8 + ret + + DEBUG_INFO __kmp_invoke_microtask +// -- End __kmp_invoke_microtask + +// kmp_uint64 +// __kmp_hardware_timestamp(void) + .text + PROC __kmp_hardware_timestamp + rdtsc + shlq $32, %rdx + orq %rdx, %rax + ret + + DEBUG_INFO __kmp_hardware_timestamp +// -- End __kmp_hardware_timestamp + +//------------------------------------------------------------------------ +// FUNCTION __kmp_bsr32 +// +// int +// __kmp_bsr32( int ); + .text + PROC __kmp_bsr32 + + bsr %edi,%eax + ret + + DEBUG_INFO __kmp_bsr32 + + +// ----------------------------------------------------------------------- +#endif /* KMP_ARCH_X86_64 */ + +// ' +#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; +// } +// +// parameters: +// x0: pkfn +// w1: gtid +// w2: tid +// w3: argc +// x4: p_argv +// x5: &exit_frame +// +// locals: +// __gtid: gtid parm pushed on stack so can pass >id to pkfn +// __tid: tid parm pushed on stack so can pass &tid to pkfn +// +// reg temps: +// x8: used to hold pkfn address +// w9: used as temporary for number of pkfn parms +// x10: used to traverse p_argv array +// x11: used as temporary for stack placement calculation +// x12: used as temporary for stack parameters +// x19: used to preserve exit_frame_ptr, callee-save +// +// return: w0 (always 1/TRUE) +// + +__gtid = 4 +__tid = 8 + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + PROC __kmp_invoke_microtask + + stp x29, x30, [sp, #-16]! +# if OMPT_SUPPORT + stp x19, x20, [sp, #-16]! +# endif + mov x29, sp + + orr w9, wzr, #1 + add w9, w9, w3, lsr #1 + sub sp, sp, w9, lsl #4 + mov x11, sp + + mov x8, x0 + str w1, [x29, #-__gtid] + str w2, [x29, #-__tid] + mov w9, w3 + mov x10, x4 +# if OMPT_SUPPORT + mov x19, x5 + str x29, [x19] +# endif + + sub x0, x29, #__gtid + sub x1, x29, #__tid + + cbz w9, KMP_LABEL(kmp_1) + ldr x2, [x10] + + sub w9, w9, #1 + cbz w9, KMP_LABEL(kmp_1) + ldr x3, [x10, #8]! + + sub w9, w9, #1 + cbz w9, KMP_LABEL(kmp_1) + ldr x4, [x10, #8]! + + sub w9, w9, #1 + cbz w9, KMP_LABEL(kmp_1) + ldr x5, [x10, #8]! + + sub w9, w9, #1 + cbz w9, KMP_LABEL(kmp_1) + ldr x6, [x10, #8]! + + sub w9, w9, #1 + cbz w9, KMP_LABEL(kmp_1) + ldr x7, [x10, #8]! + +KMP_LABEL(kmp_0): + sub w9, w9, #1 + cbz w9, KMP_LABEL(kmp_1) + ldr x12, [x10, #8]! + str x12, [x11], #8 + b KMP_LABEL(kmp_0) +KMP_LABEL(kmp_1): + blr x8 + orr w0, wzr, #1 + mov sp, x29 +# if OMPT_SUPPORT + str xzr, [x19] + ldp x19, x20, [sp], #16 +# endif + ldp x29, x30, [sp], #16 + ret + + DEBUG_INFO __kmp_invoke_microtask +// -- End __kmp_invoke_microtask + +#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ + +#if KMP_ARCH_PPC64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)( int *gtid, int *tid, ... ); +// +// int +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] ) { +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// return 1; +// } +// +// parameters: +// r3: pkfn +// r4: gtid +// r5: tid +// r6: argc +// r7: p_argv +// r8: &exit_frame +// +// return: r3 (always 1/TRUE) +// + .text +# if KMP_ARCH_PPC64_LE + .abiversion 2 +# endif + .globl __kmp_invoke_microtask + +# if KMP_ARCH_PPC64_LE + .p2align 4 +# else + .p2align 2 +# endif + + .type __kmp_invoke_microtask,@function + +# if KMP_ARCH_PPC64_LE +__kmp_invoke_microtask: +.Lfunc_begin0: +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 +# else + .section .opd,"aw",@progbits +__kmp_invoke_microtask: + .p2align 3 + .quad .Lfunc_begin0 + .quad .TOC.@tocbase + .quad 0 + .text +.Lfunc_begin0: +# endif + +// -- Begin __kmp_invoke_microtask +// mark_begin; + +// We need to allocate a stack frame large enough to hold all of the parameters +// on the stack for the microtask plus what this function needs. That's 48 +// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the +// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, +// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes +// to save r30 to hold a copy of r8. + + .cfi_startproc + mflr 0 + std 31, -8(1) + std 0, 16(1) + +// This is unusual because normally we'd set r31 equal to r1 after the stack +// frame is established. In this case, however, we need to dynamically compute +// the stack frame size, and so we keep a direct copy of r1 to access our +// register save areas and restore the r1 value before returning. + mr 31, 1 + .cfi_def_cfa_register r31 + .cfi_offset r31, -8 + .cfi_offset lr, 16 + +// Compute the size necessary for the local stack frame. +# if KMP_ARCH_PPC64_LE + li 12, 72 +# else + li 12, 88 +# endif + sldi 0, 6, 3 + add 12, 0, 12 + neg 12, 12 + +// We need to make sure that the stack frame stays aligned (to 16 bytes, except +// under the BG/Q CNK, where it must be to 32 bytes). +# if KMP_OS_CNK + li 0, -32 +# else + li 0, -16 +# endif + and 12, 0, 12 + +// Establish the local stack frame. + stdux 1, 1, 12 + +# if OMPT_SUPPORT + .cfi_offset r30, -16 + std 30, -16(31) + std 1, 0(8) + mr 30, 8 +# endif + +// Store gtid and tid to the stack because they're passed by reference to the microtask. + stw 4, -20(31) + stw 5, -24(31) + + mr 12, 6 + mr 4, 7 + + cmpwi 0, 12, 1 + blt 0, .Lcall + + ld 5, 0(4) + + cmpwi 0, 12, 2 + blt 0, .Lcall + + ld 6, 8(4) + + cmpwi 0, 12, 3 + blt 0, .Lcall + + ld 7, 16(4) + + cmpwi 0, 12, 4 + blt 0, .Lcall + + ld 8, 24(4) + + cmpwi 0, 12, 5 + blt 0, .Lcall + + ld 9, 32(4) + + cmpwi 0, 12, 6 + blt 0, .Lcall + + ld 10, 40(4) + + cmpwi 0, 12, 7 + blt 0, .Lcall + +// There are more than 6 microtask parameters, so we need to store the +// remainder to the stack. + addi 12, 12, -6 + mtctr 12 + +// These are set to 8 bytes before the first desired store address (we're using +// pre-increment loads and stores in the loop below). The parameter save area +// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and +// 32 + 8*8 == 96 bytes above r1 for ELFv2. + addi 4, 4, 40 +# if KMP_ARCH_PPC64_LE + addi 12, 1, 88 +# else + addi 12, 1, 104 +# endif + +.Lnext: + ldu 0, 8(4) + stdu 0, 8(12) + bdnz .Lnext + +.Lcall: +# if KMP_ARCH_PPC64_LE + std 2, 24(1) + mr 12, 3 +#else + std 2, 40(1) +// For ELFv1, we need to load the actual function address from the function descriptor. + ld 12, 0(3) + ld 2, 8(3) + ld 11, 16(3) +#endif + + addi 3, 31, -20 + addi 4, 31, -24 + + mtctr 12 + bctrl +# if KMP_ARCH_PPC64_LE + ld 2, 24(1) +# else + ld 2, 40(1) +# endif + +# if OMPT_SUPPORT + li 3, 0 + std 3, 0(30) +# endif + + li 3, 1 + +# if OMPT_SUPPORT + ld 30, -16(31) +# endif + + mr 1, 31 + ld 0, 16(1) + ld 31, -8(1) + mtlr 0 + blr + + .long 0 + .quad 0 +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_PPC64 */ + +#if KMP_ARCH_ARM || KMP_ARCH_MIPS + .data + .comm .gomp_critical_user_,32,8 + .data + .align 4 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .4byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr,4 +#endif /* KMP_ARCH_ARM */ + +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 + .data + .comm .gomp_critical_user_,32,8 + .data + .align 8 + .global __kmp_unnamed_critical_addr +__kmp_unnamed_critical_addr: + .8byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr,8 +#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ + +#if KMP_OS_LINUX +# if KMP_ARCH_ARM +.section .note.GNU-stack,"",%progbits +# else +.section .note.GNU-stack,"",@progbits +# endif +#endif |