More progress · overby.me/darling-nix@a76db62

+12 -2

src/duct/include/commpage.h

··· 7 7 extern uint8_t __commpage[]; 8 8 #endif 9 9 10 - #define _COMM_PAGE_BASE_ADDRESS (__commpage) 10 + //#ifndef __ASSEMBLER__ 11 + //#define _COMM_PAGE_BASE_ADDRESS (__commpage) 12 + //#else 13 + //#define _COMM_PAGE_BASE_ADDRESS ___commpage@GOTPCREL(%rip) 14 + //#endif 11 15 #define _COMM_PAGE_AREA_LENGTH ( 1 * 4096 ) /* reserved length of entire comm area */ 12 - //#define _COMM_PAGE_BASE_ADDRESS ( 0xffff0000 ) /* base address of allocated memory */ 16 + 17 + #ifdef __x86_64__ 18 + #define _COMM_PAGE_BASE_ADDRESS ( 0x7fffffe00000 ) /* base address of allocated memory */ 19 + #else 20 + #define _COMM_PAGE_BASE_ADDRESS ( 0xaaaa0000 ) /* base address of allocated memory */ 21 + #endif 22 + 13 23 #define _COMM_PAGE_START_ADDRESS ( _COMM_PAGE_BASE_ADDRESS ) /* address traditional commpage code starts on */ 14 24 #define _COMM_PAGE_AREA_USED ( 1 * 4096 ) /* this is the amt actually allocated */ 15 25 #define _COMM_PAGE_SIGS_OFFSET 0x8000 /* offset to routine signatures */

+1 -9

src/duct/src/CMakeLists.txt

··· 2 2 3 3 cmake_minimum_required(VERSION 2.4.0) 4 4 5 - if(CMAKE_SIZEOF_VOID_P EQUAL 4 OR CMAKE_INSTALL_LIBDIR STREQUAL "lib32") 6 - set(BITS 32) 7 - add_definitions(-DTARGET_CPU_X86=1) 8 - else(CMAKE_SIZEOF_VOID_P EQUAL 4 OR CMAKE_INSTALL_LIBDIR STREQUAL "lib32") 9 - set(BITS 64) 10 - add_definitions(-DTARGET_CPU_X86_64=1) 11 - endif(CMAKE_SIZEOF_VOID_P EQUAL 4 OR CMAKE_INSTALL_LIBDIR STREQUAL "lib32") 12 - 13 5 add_definitions(-DTARGET_OS_MAC=1) 14 6 add_definitions(-DHAVE_STDINT_H=1) 15 7 add_definitions(-D__APPLE__ -D__DYNAMIC__) 16 8 add_definitions(-D__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1080) 17 9 18 10 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -nostdinc -D__DARWIN_UNIX03 -fPIC -w") 19 - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -nostdlib") 11 + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -nostdlib -Wl,-flat_namespace -Wl,-undefined,suppress") 20 12 21 13 SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/darling") 22 14 SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)

+7 -3

src/duct/src/commpage.c

··· 1 1 #include <commpage.h> 2 + #include <sys/mman.h> 2 3 3 - uint8_t __commpage[_COMM_PAGE_AREA_LENGTH]; 4 + //uint8_t __commpage[_COMM_PAGE_AREA_LENGTH]; 4 5 5 6 __attribute__((constructor)) 6 7 void _darling_initialize_commpage(void) 7 8 { 8 - *(_COMM_PAGE_NCPUS) = 1; 9 - *(_COMM_PAGE_VERSION) = 12; 9 + mmap(_COMM_PAGE_BASE_ADDRESS, _COMM_PAGE_AREA_LENGTH, PROT_READ|PROT_WRITE, 10 + MAP_ANON | MAP_PRIVATE, -1, 0); 11 + 12 + *((uint8_t*)_COMM_PAGE_NCPUS) = 1; 13 + *((uint8_t*)_COMM_PAGE_VERSION) = 12; 10 14 *((uint64_t*) _COMM_PAGE_MEMORY_SIZE) = 1024LL*1024LL*1024LL; 11 15 } 12 16

+2 -3

src/libc/CMakeLists.txt

··· 19 19 #add_definitions("'-D__weak_reference(sym,alias)=__asm__(\".weak \" \\\#alias );__asm__(\".equ \" \\\#alias \", \" \\\#sym)'") 20 20 #add_definitions("'-D__weak_reference(sym,alias)='") 21 21 add_definitions("'-D__warn_references(name,warning)='") 22 - add_definitions(-Ddladdr=__darwin_dladdr) 22 + #add_definitions(-Ddladdr=__darwin_dladdr) 23 23 add_definitions(-DBUILDING_LIBC) 24 24 25 25 # Hide warnings that get spammed ··· 35 35 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -nostdinc -fPIC -w ") 36 36 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/weak_reference.h") 37 37 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ggdb -O0") # development flags 38 - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -nostdlib -Wl,--version-script=${DARLING_TOP_DIRECTORY}/darwin.map -Bsymbolic-functions") 39 - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--defsym=__darwin_pthread_self=pthread_self") 38 + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -nostdlib") 40 39 41 40 include_directories("${CMAKE_CURRENT_SOURCE_DIR}/fbsdcompat") 42 41 include_directories("${CMAKE_CURRENT_SOURCE_DIR}/pthreads")

+17 -61

src/libc/gen/platfunc.h

··· 1 - // Modified by Lubos Dolezel for Darling 2 - 3 1 /* 4 2 * Copyright (c) 2003-2010 Apple Inc. All rights reserved. 5 3 * ··· 40 38 // This is a shared macro which calls PLATFUNC_VARIANT_NAME which has different 41 39 // implementations in __ASSEMBLER__ and !__ASSEMBLER__ 42 40 #define PLATFUNC_DESCRIPTOR_NAME(name, variant) \ 43 - PLATFUNC_VARIANT_NAME(name, variant) 41 + PLATFUNC_VARIANT_NAME(platfunc_##name,variant) 44 42 45 43 #ifdef __ASSEMBLER__ 46 44 ··· 54 52 */ 55 53 #define MP_SPIN_TRIES 1000 56 54 57 - #ifndef DARLING 58 - 59 - #define PLATFUNC_VARIANT_NAME(name, variant) _ ## name ## $VARIANT$ ## variant 60 - 61 - #else 62 - 63 - #define PLATFUNC_VARIANT_NAME(name, variant) name ##$VARIANT$ ## variant 64 - 65 - #endif 55 + #define PLATFUNC_VARIANT_NAME(name, variant) _##name##$VARIANT$##variant 66 56 67 57 #if defined (__i386__) 68 58 #define PLATFUNC_DESCRIPTOR_FIELD_POINTER .long 69 - #define PLATFUNC_DESCRIPTOR_REFERENCE(name, variant) \ 70 - .long PLATFUNC_DESCRIPTOR_NAME(name, variant) 59 + #define PLATFUNC_DESCRIPTOR_REFERENCE(name,variant) \ 60 + .long PLATFUNC_DESCRIPTOR_NAME(name,variant) 71 61 #elif defined (__x86_64__) 72 62 #define PLATFUNC_DESCRIPTOR_FIELD_POINTER .quad 73 63 #define PLATFUNC_DESCRIPTOR_REFERENCE(name, variant) \ 74 - .quad PLATFUNC_DESCRIPTOR_NAME(name, variant) 64 + .quad PLATFUNC_DESCRIPTOR_NAME(name,variant) 75 65 #else 76 66 #error unsupported architecture 77 67 #endif ··· 79 69 #ifdef VARIANT_DYLD 80 70 81 71 #define PLATFUNC_FUNCTION_START_GENERIC(name, variant, codetype, alignment) \ 82 - PLATFUNC_FUNCTION_START(name, variant, codetype, alignment) \ 72 + PLATFUNC_FUNCTION_START(name,variant, codetype, alignment) \ 83 73 .globl _ ## name ;\ 84 74 _ ## name ## : 85 75 86 76 #define PLATFUNC_DESCRIPTOR(name, variant, must, cant) 87 77 88 - #elif defined(DARLING) 89 - 90 - #undef PLATFUNC_DESCRIPTOR_NAME 91 - #define PLATFUNC_DESCRIPTOR_NAME(name, variant) name##$VARIANT_DESC$##variant 92 - 93 - #define PLATFUNC_FUNCTION_START_GENERIC(name, variant, codetype, alignment) \ 94 - PLATFUNC_FUNCTION_START(name, variant, codetype, alignment) 95 - 96 - #define PLATFUNC_DESCRIPTOR(name, variant, must, cant) \ 97 - .globl PLATFUNC_DESCRIPTOR_NAME(name,variant) ;\ 98 - .hidden PLATFUNC_DESCRIPTOR_NAME(name,variant) ;\ 99 - PLATFUNC_DESCRIPTOR_NAME(name,variant) ## : ;\ 100 - PLATFUNC_DESCRIPTOR_FIELD_POINTER PLATFUNC_VARIANT_NAME(name,variant) ;\ 101 - .long must ;\ 102 - .long cant ;\ 103 - 104 78 #else /* VARIANT_DYLD */ 105 79 106 80 #define PLATFUNC_FUNCTION_START_GENERIC PLATFUNC_FUNCTION_START 107 81 108 82 #define PLATFUNC_DESCRIPTOR(name, variant, must, cant) \ 109 83 .const_data ;\ 110 - .private_extern PLATFUNC_DESCRIPTOR_NAME(name, variant) ;\ 111 - PLATFUNC_DESCRIPTOR_NAME(name, variant) ## : ;\ 112 - PLATFUNC_DESCRIPTOR_FIELD_POINTER PLATFUNC_VARIANT_NAME(name, variant) ;\ 84 + .private_extern PLATFUNC_DESCRIPTOR_NAME(name,variant) ;\ 85 + PLATFUNC_DESCRIPTOR_NAME(name,variant) ## : ;\ 86 + PLATFUNC_DESCRIPTOR_FIELD_POINTER PLATFUNC_VARIANT_NAME(name,variant) ;\ 113 87 .long must ;\ 114 88 .long cant ;\ 115 89 .text 116 90 117 91 #endif /* VARIANT_DYLD */ 118 92 119 - #ifdef DARLING 120 - 121 - #define PLATFUNC_FUNCTION_START(name, variant, codetype, alignment) \ 122 - .text ;\ 123 - .align (2<<alignment), 0x90 ;\ 124 - .globl PLATFUNC_VARIANT_NAME(name,variant) ## ;\ 125 - PLATFUNC_VARIANT_NAME(name,variant) ##: 126 - 127 - #else 128 - 129 93 #define PLATFUNC_FUNCTION_START(name, variant, codetype, alignment) \ 130 94 .text ;\ 131 95 .align alignment, 0x90 ;\ 132 - .private_extern PLATFUNC_VARIANT_NAME(name, variant) ;\ 133 - PLATFUNC_VARIANT_NAME(name, variant) ## : 134 - 135 - #endif 96 + .private_extern PLATFUNC_VARIANT_NAME(name,variant) ;\ 97 + PLATFUNC_VARIANT_NAME(name,variant) ## : 136 98 137 99 #else /* __ASSEMBLER__ */ 138 100 139 - #ifdef DARLING 140 - #undef PLATFUNC_DESCRIPTOR_NAME 141 - #define PLATFUNC_DESCRIPTOR_NAME(name, variant) name ## $VARIANT_DESC$ ## variant 142 - #endif 143 - 144 - #define PLATFUNC_VARIANT_NAME(name, variant) name ## $VARIANT$ ## variant 145 - 146 - #define PLATFUNC_DESCRIPTOR_PROTOTYPE(name, variant) extern const platfunc_descriptor PLATFUNC_DESCRIPTOR_NAME(name, variant); 147 - #define PLATFUNC_DESCRIPTOR_REFERENCE(name, variant) &PLATFUNC_DESCRIPTOR_NAME(name, variant) 101 + #define PLATFUNC_VARIANT_NAME(name, variant) name##$VARIANT$##variant 102 + #define PLATFUNC_DESCRIPTOR_PROTOTYPE(name, variant) extern const platfunc_descriptor PLATFUNC_DESCRIPTOR_NAME(name,variant); 103 + #define PLATFUNC_DESCRIPTOR_REFERENCE(name, variant) &PLATFUNC_DESCRIPTOR_NAME(name,variant) 148 104 149 105 #define PLATFUNC_DESCRIPTOR(name, variant, must, cant) \ 150 - extern void PLATFUNC_VARIANT_NAME(name, variant) (void); \ 151 - const platfunc_descriptor PLATFUNC_DESCRIPTOR_NAME(name, variant) = { \ 152 - .address = PLATFUNC_VARIANT_NAME(name, variant), \ 106 + extern void PLATFUNC_VARIANT_NAME(name,variant) (void); \ 107 + const platfunc_descriptor PLATFUNC_DESCRIPTOR_NAME(name,variant) = { \ 108 + .address = PLATFUNC_VARIANT_NAME(name,variant), \ 153 109 .musthave = must, \ 154 110 .canthave = cant \ 155 111 }

-1

src/libc/gen/stack_logging_disk.c

··· 732 732 */ 733 733 static ssize_t 734 734 robust_write(int fd, const void *buf, size_t nbyte) { 735 - __asm__(".symver errno, errno@DARWIN"); 736 735 extern int errno; 737 736 ssize_t written = write(fd, buf, nbyte); 738 737 if (written == -1 && errno == EBADF) {

-1

src/libc/gen/wordexp.c

··· 38 38 #include <crt_externs.h> 39 39 40 40 extern size_t malloc_good_size(size_t size); 41 - __asm__(".symver errno, errno@DARWIN"); 42 41 extern int errno; 43 42 44 43 pthread_once_t re_init_c = PTHREAD_ONCE_INIT;

-8

src/libc/pthreads/pthread_tsd.c

··· 253 253 254 254 } 255 255 256 - #ifdef DARLING 257 - 258 - // Needed for Darling libdyld 259 - 260 - int __darwin_pthread_key_create(pthread_key_t *key, void (*destructor)(void*)) __attribute__((weak, alias("pthread_key_create"))); 261 - int __darwin_pthread_key_delete(pthread_key_t key) __attribute__((weak, alias("pthread_key_delete"))); 262 - int __darwin_pthread_setspecific(pthread_key_t key, const void *value) __attribute__((weak, alias("pthread_setspecific"))); 263 - #endif 264 256

-1

src/libc/threads/cprocs.c

··· 135 135 */ 136 136 137 137 #undef errno 138 - __asm__(".symver errno, errno@DARWIN"); 139 138 extern int errno; 140 139 extern int *__error(void); 141 140 extern int __pthread_canceled(int);

+20 -20

src/libc/x86_64/CMakeLists.txt

··· 3 3 cmake_minimum_required(VERSION 2.8.0) 4 4 enable_language(C ASM) 5 5 6 - add_definitions(-D_getcontext=getcontext) 7 - add_definitions(-D_moncount=moncount) 8 - add_definitions(-D__setcontext=_setcontext) 9 - add_definitions(-D_getmcontext=getmcontext) 10 - add_definitions(-D__ctx_done=_ctx_done) 11 - add_definitions(-D__ctx_start=_ctx_start) 12 - add_definitions(-D_cpu_number=cpu_number) 13 - add_definitions(-D_sys_icache_invalidate=sys_icache_invalidate) 14 - add_definitions(-D_sys_dcache_flush=sys_dcache_flush) 6 + #add_definitions(-D_getcontext=getcontext) 7 + #add_definitions(-D_moncount=moncount) 8 + #add_definitions(-D__setcontext=_setcontext) 9 + #add_definitions(-D_getmcontext=getmcontext) 10 + #add_definitions(-D__ctx_done=_ctx_done) 11 + #add_definitions(-D__ctx_start=_ctx_start) 12 + #add_definitions(-D_cpu_number=cpu_number) 13 + #add_definitions(-D_sys_icache_invalidate=sys_icache_invalidate) 14 + #add_definitions(-D_sys_dcache_flush=sys_dcache_flush) 15 15 add_definitions(-DPRIVATE) 16 16 17 17 #set(CMAKE_C_FLAGS "-I${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/../../duct/include/commpage.h -include ${DARLING_TOP_DIRECTORY}/platform-include/sys/time.h") ··· 31 31 ../i386/pthreads/init_cpu_capabilities.c 32 32 ) 33 33 34 - add_definitions(-D_preempt=preempt) 35 - add_definitions(-D_backoff=backoff) 36 - add_definitions(-D_pthread_getspecific=pthread_getspecific) 37 - add_definitions(-D__commpage_pthread_mutex_lock=_commpage_pthread_mutex_lock) 38 - add_definitions(-D_pthread_self=pthread_self) 39 - add_definitions(-D___pthread_set_self=__pthread_set_self) 40 - add_definitions(-D_start_wqthread=start_wqthread) 41 - add_definitions(-D_thread_start=thread_start) 42 - add_definitions(-D__pthread_start=_pthread_start) 43 - add_definitions(-D__pthread_wqthread=_pthread_wqthread) 34 + #add_definitions(-D_preempt=preempt) 35 + #add_definitions(-D_backoff=backoff) 36 + #add_definitions(-D_pthread_getspecific=pthread_getspecific) 37 + #add_definitions(-D__commpage_pthread_mutex_lock=_commpage_pthread_mutex_lock) 38 + #add_definitions(-D_pthread_self=pthread_self) 39 + #add_definitions(-D___pthread_set_self=__pthread_set_self) 40 + #add_definitions(-D_start_wqthread=start_wqthread) 41 + #add_definitions(-D_thread_start=thread_start) 42 + #add_definitions(-D__pthread_start=_pthread_start) 43 + #add_definitions(-D__pthread_wqthread=_pthread_wqthread) 44 44 45 45 set(x86_64_sources ${x86_64_sources} 46 46 pthreads/preempt.S ··· 85 85 sys/setjmp.S 86 86 sys/_sigtramp.S 87 87 sys/spinlocks_asm.S 88 - #sys/spinlocks.c # aliases in other complation units don't work, work done in spinlocks_asm.S 88 + sys/spinlocks.c # aliases in other complation units don't work, work done in spinlocks_asm.S 89 89 ) 90 90 91 91

-5

src/libc/x86_64/gen/_ctx_start.S

··· 50 50 51 51 #if defined(__x86_64__) 52 52 53 - #ifdef DARLING 54 - # define __ctx_start _ctx_start 55 - # define __ctx_done _ctx_done 56 - #endif 57 - 58 53 #include <architecture/i386/asm_help.h> 59 54 60 55 /*

-4

src/libc/x86_64/gen/_setcontext.S

··· 24 24 25 25 #if defined(__x86_64__) 26 26 27 - #ifdef DARLING 28 - # define __setcontext _setcontext 29 - #endif 30 - 31 27 #include <architecture/i386/asm_help.h> 32 28 33 29 #define MCONTEXT_SS_RAX 16

-4

src/libc/x86_64/gen/cpu_number.S

··· 37 37 * osfmk/i386/mp_desc.c. 38 38 */ 39 39 40 - #ifdef DARLING 41 - # define _cpu_number cpu_number 42 - #endif 43 - 44 40 /* return logical cpu number in %rax */ 45 41 46 42 .globl _cpu_number

-4

src/libc/x86_64/gen/getcontext.S

··· 24 24 25 25 #if defined(__x86_64__) 26 26 27 - #ifdef DARLING 28 - # define _getcontext getcontext 29 - #endif 30 - 31 27 #include <architecture/i386/asm_help.h> 32 28 33 29 #define MCONTEXT_SS_RAX 16

+1 -6

src/libc/x86_64/gen/icacheinval.S

··· 26 26 27 27 #include <machine/cpu_capabilities.h> 28 28 29 - #ifdef DARLING 30 - # define _sys_icache_invalidate sys_icache_invalidate 31 - # define _sys_dcache_flush sys_dcache_flush 32 - #endif 33 - 34 29 .text 35 - .align (2<<4), 0x00 30 + .align 4, 0x00 36 31 37 32 /* void sys_icache_invalidate(addr_t start, int length) */ 38 33

+3 -5

src/libc/x86_64/gen/mcount.S

··· 20 20 * 21 21 * @APPLE_LICENSE_HEADER_END@ 22 22 */ 23 - #include <architecture/i386/asm_help.h> 23 + #import <architecture/i386/asm_help.h> 24 24 25 25 .text 26 - .globl mcount 27 - mcount: 28 - #ifndef DARLING /* DARLING-TODO, moncount is defunct/deprecated API */ 26 + .globl _mcount 27 + _mcount: 29 28 pushq %rbp // setup mcount's frame 30 29 movq %rsp, %rbp 31 30 subq $64, %rsp // allocate space for storage and alignment ··· 50 49 51 50 movq %rbp, %rsp 52 51 popq %rbp // tear down frame 53 - #endif 54 52 ret

-4

src/libc/x86_64/pthreads/pthread_getspecific.S

··· 24 24 25 25 #include "pthread_machdep.h" 26 26 27 - #ifdef DARLING 28 - # define _pthread_getspecific pthread_getspecific 29 - #endif 30 - 31 27 .text 32 28 .align (2<<2), 0x90 33 29 .globl _pthread_getspecific

-4

src/libc/x86_64/pthreads/pthread_mutex_lock.S

··· 24 24 #include <machine/cpu_capabilities.h> 25 25 #include <architecture/i386/asm_help.h> 26 26 27 - #ifdef DARLING 28 - # define __commpage_pthread_mutex_lock _commpage_pthread_mutex_lock 29 - #endif 30 - 31 27 #define PTHRW_LVAL 0 32 28 #define PTHRW_UVAL 4 33 29

-4

src/libc/x86_64/pthreads/pthread_self.S

··· 24 24 25 25 #include "pthread_machdep.h" 26 26 27 - #ifdef DARLING 28 - # define _pthread_self pthread_self 29 - #endif 30 - 31 27 .text 32 28 .align (2<<2), 0x90 33 29 .globl _pthread_self

-4

src/libc/x86_64/pthreads/pthread_set_self.S

··· 23 23 24 24 #include <mach/i386/syscall_sw.h> 25 25 26 - #ifdef DARLING 27 - # define ___pthread_set_self __pthread_set_self 28 - #endif 29 - 30 26 .text 31 27 .align (2<<2), 0x90 32 28 .globl ___pthread_set_self

-5

src/libc/x86_64/pthreads/start_wqthread.S

··· 25 25 #include "pthread_machdep.h" 26 26 #include <architecture/i386/asm_help.h> 27 27 28 - #ifdef DARLING 29 - # define _start_wqthread start_wqthread 30 - # define __pthread_wqthread _pthread_wqthread 31 - #endif 32 - 33 28 .text 34 29 .align (2<<2), 0x90 35 30 .globl _start_wqthread

-5

src/libc/x86_64/pthreads/thread_start.S

··· 25 25 #include "pthread_machdep.h" 26 26 #include <architecture/i386/asm_help.h> 27 27 28 - #ifdef DARLING 29 - # define _thread_start thread_start 30 - # define __pthread_start _pthread_start 31 - #endif 32 - 33 28 .text 34 29 .align (2<<2), 0x90 35 30 .globl _thread_start

+3 -3

src/libc/x86_64/string/__bzero.S

··· 1 - .globl __bzero 2 - __bzero: 3 - jmp bzero@PLT 1 + .globl ___bzero 2 + ___bzero: 3 + jmp _bzero

+3 -15

src/libc/x86_64/string/bcopy.c

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 3 * ··· 23 22 */ 24 23 25 24 #include <machine/cpu_capabilities.h> 26 - #include <sys/types.h> 27 25 #include "platfunc.h" 28 26 29 27 PLATFUNC_DESCRIPTOR_PROTOTYPE(bcopy, sse42) ··· 35 33 0 36 34 }; 37 35 38 - void bcopy_chooser(const void *src, void *dest, size_t n) 39 - #ifndef DARLING 40 - __asm__("_bcopy"); 41 - #else 42 - __asm__("bcopy"); 43 - #endif 44 - void bcopy_chooser(const void *src, void *dest, size_t n) { 45 - #ifndef DARLING 36 + void *bcopy_chooser() __asm__("_bcopy"); 37 + void *bcopy_chooser() { 46 38 __asm__(".desc _bcopy, 0x100"); 47 - #endif 48 - void (*impl)(const void *src, void *dest, size_t n); 49 - impl = find_platform_function((const platfunc_descriptor **) bcopy_platfunc_descriptors); 50 - 51 - impl(src, dest, n); 39 + return find_platform_function((const platfunc_descriptor **) bcopy_platfunc_descriptors); 52 40 }

-5

src/libc/x86_64/string/bcopy_sse3x.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. 4 3 * ··· 36 35 * 37 36 * The following #defines are tightly coupled to the u-architecture: 38 37 */ 39 - 40 - #ifdef DARLING 41 - # define _longcopy longcopy@PLT 42 - #endif 43 38 44 39 #define kShort 80 // too short to bother with SSE (must be >=80) 45 40 #define kVeryLong (500*1024) // large enough for non-temporal stores (>=8192 and <2GB)

+4 -9

src/libc/x86_64/string/bzero.c

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 3 * ··· 23 22 */ 24 23 25 24 #include <machine/cpu_capabilities.h> 26 - #include <sys/types.h> 27 25 #include "platfunc.h" 28 26 29 27 PLATFUNC_DESCRIPTOR_PROTOTYPE(bzero, sse42) ··· 35 33 0 36 34 }; 37 35 38 - void bzero_chooser(void *s, size_t n) __asm__("bzero"); 39 - void bzero_chooser(void *s, size_t n) { 40 - //__asm__(".desc _bzero, 0x100"); 41 - void (*impl)(void*, size_t); 42 - impl = find_platform_function((const platfunc_descriptor **) bzero_platfunc_descriptors); 43 - 44 - impl(s, n); 36 + void *bzero_chooser() __asm__("_bzero"); 37 + void *bzero_chooser() { 38 + __asm__(".desc _bzero, 0x100"); 39 + return find_platform_function((const platfunc_descriptor **) bzero_platfunc_descriptors); 45 40 }

+4 -12

src/libc/x86_64/string/ffs.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. 4 3 * ··· 21 20 * 22 21 * @APPLE_LICENSE_HEADER_END@ 23 22 */ 24 - 25 - #ifdef DARLING 26 - # define _ffs ffs 27 - # define _ffsl ffsl 28 - # define _fls fls 29 - # define _flsl flsl 30 - #endif 31 23 32 24 .text 33 25 34 - .align (2<<2) 26 + .align 2 35 27 .globl _ffs 36 28 _ffs: 37 29 movl $(-1), %edx ··· 41 33 ret 42 34 43 35 44 - .align (2<<2) 36 + .align 2 45 37 .globl _ffsl 46 38 _ffsl: 47 39 movl $(-1), %edx ··· 51 43 ret 52 44 53 45 54 - .align (2<<2) 46 + .align 2 55 47 .globl _fls 56 48 _fls: 57 49 movl $(-1), %edx ··· 61 53 ret 62 54 63 55 64 - .align (2<<2) 56 + .align 2 65 57 .globl _flsl 66 58 _flsl: 67 59 movl $(-1), %edx

+2 -9

src/libc/x86_64/string/longcopy_sse3x.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. 4 3 * ··· 56 55 57 56 // void longcopy(const void *dest, void *sou, size_t len) 58 57 59 - #ifdef DARLING 60 - # define _longcopy longcopy 61 - # define _memcpy memcpy@PLT 62 - .globl longcopy 63 - #endif 64 - 65 - // .private_extern _longcopy 58 + .private_extern _longcopy 66 59 _longcopy: 67 60 pushq %rbp // set up a frame for backtraces 68 61 movq %rsp,%rbp ··· 103 96 // ecx = scratch reg used to read a byte of each cache line 104 97 // eax = chunk offset 105 98 106 - .align (2<<4),0x90 // 16-byte align inner loops 99 + .align 4,0x90 // 16-byte align inner loops 107 100 LTouchLoop: 108 101 movzb (%rsi,%rax),%ecx // touch line 0, 2, 4, or 6 of page 109 102 movzb 1*64(%rsi,%rax),%ecx // touch line 1, 3, 5, or 7

+1 -9

src/libc/x86_64/string/memcmp.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. 4 3 * ··· 41 40 42 41 #define kShort 18 // too short for vectors (must be >16) 43 42 44 - #ifdef DARLING 45 - # define _memcmp memcmp 46 - # define _bcmp bcmp 47 - .type memcmp, @function 48 - .type bcmp, @function 49 - #endif 50 - 51 43 .text 52 - .align (2<<4) 44 + .align 4 53 45 54 46 .globl _memcmp 55 47 .globl _bcmp

+4 -9

src/libc/x86_64/string/memcpy.c

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 3 * ··· 23 22 */ 24 23 25 24 #include <machine/cpu_capabilities.h> 26 - #include <sys/types.h> 27 25 #include "platfunc.h" 28 26 29 27 PLATFUNC_DESCRIPTOR_PROTOTYPE(memcpy, sse42) ··· 35 33 0 36 34 }; 37 35 38 - void *memcpy_chooser(void *dest, const void *src, size_t n) __asm__("memcpy"); 39 - void *memcpy_chooser(void *dest, const void *src, size_t n) { 40 - // __asm__(".desc _memcpy, 0x100"); 41 - void *(*impl)(void *, const void *, size_t); 42 - impl = find_platform_function((const platfunc_descriptor **) memcpy_platfunc_descriptors); 43 - 44 - return impl(dest, src, n); 36 + void *memcpy_chooser() __asm__("_memcpy"); 37 + void *memcpy_chooser() { 38 + __asm__(".desc _memcpy, 0x100"); 39 + return find_platform_function((const platfunc_descriptor **) memcpy_platfunc_descriptors); 45 40 }

+4 -9

src/libc/x86_64/string/memmove.c

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 3 * ··· 23 22 */ 24 23 25 24 #include <machine/cpu_capabilities.h> 26 - #include <sys/types.h> 27 25 #include "platfunc.h" 28 26 29 27 PLATFUNC_DESCRIPTOR_PROTOTYPE(memmove, sse42) ··· 35 33 0 36 34 }; 37 35 38 - void *memmove_chooser(void *dest, const void *src, size_t n) __asm__("memmove"); 39 - void *memmove_chooser(void *dest, const void *src, size_t n) { 40 - //__asm__(".desc _memmove, 0x100"); 41 - void *(*impl)(void *, const void *, size_t); 42 - impl = find_platform_function((const platfunc_descriptor **) memmove_platfunc_descriptors); 43 - 44 - return impl(dest, src, n); 36 + void *memmove_chooser() __asm__("_memmove"); 37 + void *memmove_chooser() { 38 + __asm__(".desc _memmove, 0x100"); 39 + return find_platform_function((const platfunc_descriptor **) memmove_platfunc_descriptors); 45 40 }

-10

src/libc/x86_64/string/memset.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. 4 3 * ··· 42 41 * NB: we avoid "stos" family of instructions (stosl, stosb), as they are very slow 43 42 * on P4s and probably other processors. 44 43 */ 45 - 46 - #ifdef DARLING 47 - # define _memset memset 48 - # define _memset_pattern4 memset_pattern4 49 - # define _memset_pattern8 memset_pattern8 50 - # define _memset_pattern16 memset_pattern16 51 - # define _bzero bzero@PLT 52 - .type memset, @function 53 - #endif 54 44 55 45 #define kShort 255 // for nonzero memset(), too short for commpage 56 46

+1 -7

src/libc/x86_64/string/strcmp.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. 4 3 * ··· 35 34 // reading bytes past the difference. To avoid this, we never do a load 36 35 // that crosses a page boundary. 37 36 38 - #ifdef DARLING 39 - # define _strcmp strcmp 40 - .type strcmp, @function 41 - #endif 42 - 43 37 .text 44 38 .globl _strcmp 45 39 46 - .align (2<<4) 40 + .align 4 47 41 _strcmp: // int strcmp(const char *s1,const char *s2); 48 42 49 43 // In order to avoid spurious page faults, we loop over:

+1 -7

src/libc/x86_64/string/strcpy.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. 4 3 * ··· 37 36 // 38 37 // We align the destination, because unaligned vector stores are slow. 39 38 40 - #ifdef DARLING 41 - # define _strcpy strcpy 42 - .type strcpy, @function 43 - #endif 44 - 45 39 .text 46 40 .globl _strcpy 47 41 48 - .align (2<<4) 42 + .align 4 49 43 _strcpy: // char *strcpy(const char *dst, const char *src); 50 44 movq %rdi,%rcx // preserve dest ptr so we can return it 51 45 movl %edi,%edx // copy low 4 bytes of dest ptr

+1 -7

src/libc/x86_64/string/strlcat.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 4 3 * ··· 47 46 // On Core2 class machines, this algorithm seems to be faster than the 48 47 // naive byte-by-byte version for operands longer than about 11 bytes. 49 48 50 - #ifdef DARLING 51 - # define _strlcat strlcat 52 - .type strlcat, @function 53 - #endif 54 - 55 49 .text 56 50 .globl _strlcat 57 51 ··· 64 58 // %rsi = source ptr 65 59 // %rdx = size 66 60 67 - .align (2<<4) 61 + .align 4 68 62 _strlcat: // size_t *strlcat(char *dst, const char *src, size_t size); 69 63 movl %edi,%ecx // copy buffer ptr 70 64 movq %rdi,%r10 // save copies of buffer ptr and length

+1 -7

src/libc/x86_64/string/strlcpy.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2007 Apple Inc. All rights reserved. 4 3 * ··· 44 43 // with one exception: 0x01 bytes preceeding the first zero are also 45 44 // mapped to 0x80. 46 45 47 - #ifdef DARLING 48 - # define _strlcpy strlcpy 49 - .type strlcpy,@function 50 - #endif 51 - 52 46 .text 53 47 .globl _strlcpy 54 48 ··· 57 51 // %rsi = source ptr 58 52 // %rdx = length 59 53 60 - .align (2<<4) 54 + .align 4 61 55 _strlcpy: // size_t *strlcpy(char *dst, const char *src, size_t size); 62 56 movl %esi,%ecx // copy source ptr 63 57 movq %rdi,%r10 // copy dest ptr

+1 -7

src/libc/x86_64/string/strlen.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005-2007 Apple Inc. All rights reserved. 4 3 * ··· 32 31 * We favor the fall-through (ie, short operand) path. 33 32 */ 34 33 35 - #ifdef DARLING 36 - # define _strlen strlen 37 - .type strlen, @function 38 - #endif 39 - 40 34 .text 41 35 .globl _strlen 42 - .align (2<<4), 0x90 36 + .align 4, 0x90 43 37 _strlen: // size_t strlen(char *b); 44 38 pxor %xmm0,%xmm0 // zero %xmm0 45 39 movl %edi,%ecx // copy low half of ptr

+1 -7

src/libc/x86_64/string/strncmp.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved. 4 3 * ··· 35 34 // reading bytes past the difference. To avoid this, we never do a load 36 35 // that crosses a page boundary. 37 36 38 - #ifdef DARLING 39 - # define _strncmp strncmp 40 - .type strncmp, @function 41 - #endif 42 - 43 37 #define kShort 20 // too short for vectors (must be >16) 44 38 45 39 .text 46 40 .globl _strncmp 47 41 48 - .align (2<<4) 42 + .align 4 49 43 _strncmp: // int strncmp(const char *s1, const char *s2, size_t len); 50 44 cmpq $(kShort),%rdx // worth accelerating? 51 45 ja LNotShort // yes

-7

src/libc/x86_64/string/strncpy.S

··· 1 - // Modified by Lubos Dolezel for Darling 2 1 /* 3 2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. 4 3 * ··· 42 41 // Recall that strncpy() zero fills the remainder of the dest buffer, 43 42 // and does not terminate the string if its length is greater than or 44 43 // equal to n. 45 - 46 - #ifdef DARLING 47 - # define _strncpy strncpy 48 - # define _bzero bzero@PLT 49 - .type strncpy, @function 50 - #endif 51 44 52 45 #define kShort 31 // too short to bother with vector loop 53 46

+2 -2

src/libc/x86_64/sys/OSAtomic.S

··· 28 28 #include <architecture/i386/asm_help.h> 29 29 30 30 #define DECLARE(x) \ 31 - .align 1<<2 ; \ 31 + .align 2 ; \ 32 32 .globl x ; \ 33 33 .globl x ## Barrier ; \ 34 34 x: ; \ ··· 41 41 #define ATOMIC_RET_ORIG 0 42 42 #define ATOMIC_RET_NEW 1 43 43 44 - #ifndef DARLING 44 + #ifndef DARLING__DISABLED 45 45 // compare and exchange 32-bit 46 46 // xchg32 <new> <dst> <mp> 47 47 .macro xchg32

-5

src/libc/x86_64/sys/_setjmp.S

··· 55 55 #define JB_FPCONTROL 76 56 56 #define JB_MASK 80 57 57 58 - #ifdef DARLING 59 - # define __setjmp _setjmp 60 - # define __longjmp _longjmp 61 - #endif 62 - 63 58 LEAF(__setjmp, 0) 64 59 // %rdi is a jmp_buf (struct sigcontext *) 65 60

+2 -18

src/libc/x86_64/sys/_sigtramp.S

··· 42 42 #define MCONTEXT_SS_R8 80 43 43 #define MCONTEXT_SS_RIP 144 44 44 45 - #ifdef DARLING 46 - # define __sigtramp _sigtramp 47 - # define ___sigreturn __sigreturn@PLT 48 - # define ___in_sigtramp __in_sigtramp@GOTPCREL 49 - #endif 50 - 51 45 /* register use: 52 46 %rbx uctx 53 47 ··· 60 54 ucontext_t *uctx %r8 61 55 ) 62 56 */ 63 - 64 - #ifdef DARLING 65 - # define __sigtramp _sigtramp 66 - #endif 67 57 68 58 .globl __sigtramp 69 59 .text 70 - .align 1<<4 60 + .align 4 71 61 __sigtramp: 72 62 Lstart: 73 63 /* Although this routine does not need any stack frame, various parts ··· 162 152 DW_OP_breg(3), UCONTEXT_UC_MCONTEXT, DW_OP_deref, \ 163 153 DW_OP_plus_uconst, MCONTEXT_SS_R8+(8*(regno-8)), 1 164 154 165 - #ifndef DARLING 166 155 /* Unwind tables. */ 167 156 .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 168 - #else 169 - .text 170 - #endif 171 157 172 158 EH_frame1: 173 159 .set L$set$0,LECIE1-LSCIE1 ··· 264 250 .byte DW_OP_plus_uconst, MCONTEXT_SS_RSP, DW_OP_deref 265 251 Lcfa_end: 266 252 267 - .align 1<<3 253 + .align 3 268 254 LEFDE1: 269 255 270 - #ifndef DARLING 271 256 .subsections_via_symbols 272 - #endif 273 257

+1 -1

src/libc/x86_64/sys/atomic.c

··· 25 25 #include <machine/cpu_capabilities.h> 26 26 #include "platfunc.h" 27 27 28 - #ifndef DARLING 28 + #ifndef DARLING__DISABLED 29 29 #define RESOLVER_UP_MP(symbol) \ 30 30 PLATFUNC_DESCRIPTOR(symbol, up, kUP, 0); \ 31 31 PLATFUNC_DESCRIPTOR(symbol, mp, 0, kUP); \

+1 -9

src/libc/x86_64/sys/i386_gettimeofday_asm.S

··· 31 31 #include <machine/cpu_capabilities.h> 32 32 #include "platfunc.h" 33 33 34 - #ifdef DARLING 35 - # define _mach_absolute_time mach_absolute_time@PLT 36 - #endif 37 - 38 34 #define NSEC_PER_SEC 1000*1000*1000 39 35 #define NSEC_PER_USEC 1000 40 36 41 - #ifndef DARLING 42 37 .private_extern ___commpage_gettimeofday 43 - #else 44 - # define ___commpage_gettimeofday __commpage_gettimeofday 45 - #endif 46 - .align 1<<4 38 + .align 4 47 39 ___commpage_gettimeofday: 48 40 // %rdi = ptr to timeval 49 41 pushq %rbp // set up a frame for backtraces

-4

src/libc/x86_64/sys/nanotime.S

··· 29 29 #include <sys/appleapiopts.h> 30 30 #include <machine/cpu_capabilities.h> 31 31 32 - #ifdef DARLING 33 - # define _mach_absolute_time mach_absolute_time 34 - #endif 35 - 36 32 /* 37 33 * 64-bit version _mach_absolute_time. We return the 64-bit nanotime in %rax, 38 34 */

+1 -1

src/libc/x86_64/sys/setjmp.S

··· 43 43 * 44 44 */ 45 45 46 - #ifdef DARLING 46 + #ifdef DARLING__DISABLED 47 47 # define x__setjmp _setjmp 48 48 # define x_setjmp setjmp 49 49 # define x_longjmp longjmp

+1 -1

src/libc/x86_64/sys/spinlocks.c

··· 25 25 #include <machine/cpu_capabilities.h> 26 26 #include "platfunc.h" 27 27 28 - #ifndef DARLING 28 + #ifndef DARLING__DISABLED 29 29 #define RESOLVER_UP_MP(symbol) \ 30 30 PLATFUNC_DESCRIPTOR_PROTOTYPE(symbol, up); \ 31 31 PLATFUNC_DESCRIPTOR_PROTOTYPE(symbol, mp); \

-15

src/libc/x86_64/sys/spinlocks_asm.S

··· 43 43 PLATFUNC_DESCRIPTOR(OSSpinLockTry,up,kUP,0) 44 44 PLATFUNC_DESCRIPTOR(_spin_lock_try,up,kUP,0) 45 45 46 - #ifdef DARLING 47 - .globl OSSpinLockTry 48 - .globl _spin_lock_try 49 - OSSpinLockTry: 50 - _spin_lock_try: 51 - #endif 52 46 PLATFUNC_FUNCTION_START_GENERIC(OSSpinLockTry, mp, 64, 4) 53 47 PLATFUNC_FUNCTION_START_GENERIC(_spin_lock_try, mp, 64, 4) 54 48 xorl %eax, %eax ··· 90 84 PLATFUNC_DESCRIPTOR(spin_lock,up,kUP,0) 91 85 92 86 93 - #ifdef DARLING 94 - .globl OSSpinLockLock 95 - .globl _spin_lock 96 - .globl spin_lock 97 - 98 - OSSpinLockLock: 99 - _spin_lock: 100 - spin_lock: 101 - #endif 102 87 PLATFUNC_FUNCTION_START_GENERIC(OSSpinLockLock, mp, 64, 4) 103 88 PLATFUNC_FUNCTION_START_GENERIC(_spin_lock, mp, 64, 4) 104 89 PLATFUNC_FUNCTION_START_GENERIC(spin_lock, mp, 64, 4)

+7 -6

src/libm/CMakeLists.txt

··· 5 5 6 6 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse -msse2 -msse3 -w -ggdb -O0") 7 7 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DARWIN_UNIX03 -fPIC -w -DBUILDING_FOR_CARBONCORE_LEGACY -funroll-loops -msse3") # -DBUILDING_FOR_CARBONCORE_LEGACY") 8 - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -nostdlib -Wl,--version-script=${DARLING_TOP_DIRECTORY}/darwin.map") 8 + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -nostdlib -Wl,-flat_namespace -Wl,-undefined,suppress -Wl,-alias_list,${CMAKE_CURRENT_SOURCE_DIR}/Exports/libm_Intel.a.alias") 9 + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-alias_list,${CMAKE_CURRENT_SOURCE_DIR}/Exports/libmathCommonIntel.alias") 9 10 10 11 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/Source/Intel) 11 12 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../kernel-include) 12 - add_definitions(-Dliteral8=text -Dliteral4=text -Dliteral16=text -Dmovsxw=movswl -DPRIVATE) 13 + add_definitions(-Dmovsxw=movswl -DPRIVATE) 13 14 14 15 set(libm_sources 15 16 Source/abs.c ··· 262 263 #add_library(system_m_extra_no_fenv OBJECT Source/Intel/xmm_log.c) 263 264 #set_target_properties(system_m_extra_no_fenv PROPERTIES COMPILE_FLAGS "-UBUILDING_FOR_CARBONCORE_LEGACY -ftrapping-math -fPIC -DNO_FENV") 264 265 265 - add_library(system_m_no_fenv STATIC ${libm_nofenv_sources}) # $<TARGET_OBJECTS:system_m_extra_no_fenv>) 266 - set_target_properties(system_m_no_fenv PROPERTIES COMPILE_FLAGS "-UBUILDING_FOR_CARBONCORE_LEGACY -ftrapping-math -fPIC -DNO_FENV") 266 + #add_library(system_m_no_fenv OBJECT ${libm_nofenv_sources}) # $<TARGET_OBJECTS:system_m_extra_no_fenv>) 267 + #set_target_properties(system_m_no_fenv PROPERTIES COMPILE_FLAGS "-UBUILDING_FOR_CARBONCORE_LEGACY -ftrapping-math -fPIC -DNO_FENV") 267 268 268 - add_library(system_m SHARED ${libm_sources} $<TARGET_OBJECTS:system_m_extra>) 269 - target_link_libraries(system_m PRIVATE "-Wl,--whole-archive" system_m_no_fenv) 269 + add_darling_library(system_m SHARED ${libm_sources} $<TARGET_OBJECTS:system_m_extra>) 270 + #target_link_libraries(system_m) 270 271 271 272 install(TARGETS system_m DESTINATION ${CMAKE_INSTALL_LIBDIR}/darling) 272 273

+5 -6

src/libm/Source/Intel/acosf.S

··· 28 28 S1: .double -3.333333463718495862077843807 29 29 30 30 31 - ////.const 32 - .align (1<<4) 31 + .const 32 + .align 4 33 33 34 34 /* Define some coefficients for center polynomial (used for x in [-.62, 35 35 +.62]). These are stored in pairs at aligned addresses for use in SIMD ··· 158 158 showing that a valid result will be obtained regardless of which 159 159 value rsqrtss provides. 160 160 */ 161 - #define _acosf acosf 162 - .align (1<<5) 161 + .align 5 163 162 #if !defined DevelopmentInstrumentation 164 163 // This is the regular name used in the deployed implementation. 165 - .globl acosf 166 - acosf: 164 + .globl _acosf 165 + _acosf: 167 166 #else 168 167 // This is the name used for a special test version of the routine. 169 168 .globl _acosfInstrumented

+4 -4

src/libm/Source/Intel/acoshf.S

··· 11 11 #include <machine/asm.h> 12 12 #include "abi.h" 13 13 14 - ////.const 14 + .const 15 15 16 - .align (1<<4) 16 + .align 4 17 17 .quad 0x3e607933f5c9e7de, 0xbfb348d7f4d2c2c1 // c0, d 18 18 .quad 0xbec93b87a2f58777, 0x0000000000000000 // c1, 0 19 19 .quad 0x3ffb9a832f4eb8b5, 0x3ffec80a0deff17f // a0, b0 ··· 286 286 287 287 288 288 .literal8 289 - .align (1<<3) 289 + .align 3 290 290 one: .quad 0x3ff0000000000000 291 291 onehalf: .quad 0x3fe0000000000000 292 292 onethird: .quad 0x3fd5555555555555 ··· 297 297 298 298 299 299 .text 300 - .align (1<<4) 300 + .align 4 301 301 #if defined( __x86_64__ ) 302 302 #define RELATIVE_ADDR( _a ) (_a)( %rip ) 303 303 #else

+5 -5

src/libm/Source/Intel/asinf.S

··· 27 27 S1: .double -3.333333463718495862077843807 28 28 29 29 30 - ////.const 31 - .align (1<<4) 30 + .const 31 + .align 4 32 32 33 33 /* Define some coefficients for center polynomial (used for x in [-.57, 34 34 +.57]). These are stored in pairs at aligned addresses for use in SIMD ··· 162 162 showing that a valid result will be obtained regardless of which 163 163 value rsqrtss provides. 164 164 */ 165 - .align (1<<5) 165 + .align 5 166 166 #if !defined DevelopmentInstrumentation 167 167 // This is the regular name used in the deployed implementation. 168 - .globl asinf 169 - asinf: 168 + .globl _asinf 169 + _asinf: 170 170 #else 171 171 // This is the name used for a special test version of the routine. 172 172 .globl _asinfInstrumented

+6 -6

src/libm/Source/Intel/asinhf.S

··· 52 52 #include <machine/asm.h> 53 53 #include "abi.h" 54 54 55 - ////.const 55 + .const 56 56 57 57 // Coefficients for 7th order polynomial approximation on [0, 1/4] 58 58 // The polynomail is computed in packed factored form as follows: ··· 61 61 // lo double: (cx * (x + alo)) * (x(x + b1lo) + b0lo) 62 62 // 63 63 // The high and low parts are then unpacked and multiplied. 64 - .align (1<<4) 64 + .align 4 65 65 asinhf_low: .quad 0x4008183efcaf7119, 0x4007eba0a6c21cf1 // b0hi, b0lo 66 66 .quad 0x3ffe9547f4507ace, 0xbffd2c173c2ad586 // ahi, alo 67 67 .quad 0x40021aa6afb79159, 0xc0015e2ed556dde5 // b1hi, b1lo ··· 69 69 70 70 // Coefficients for rational approximation on [1/4, 4] 71 71 // p(x) and q(x) are computed side-by-side in packed arithmetic, then unpacked and divided. 72 - .align (1<<4) 72 + .align 4 73 73 asinhf_mid: .quad 0x3e4328ccef61bd30, 0x3f80f6f9cf323b3c // p[0], q[0] 74 74 .quad 0x3f80f6e561f06785, 0x3f85b29a3e277523 // p[1], q[1] 75 75 .quad 0x3f85b32a2f11b40a, 0x3f8e26416e925090 // p[2], q[2] ··· 79 79 .quad 0x3f26c17c7b263d18, 0x3f001e11059bddca // p[6], q[6] 80 80 81 81 82 - .align (1<<4) 82 + .align 4 83 83 // Polynomial coefficients for the correction to log(2x) for the "large" case. 84 84 // The polynomial is computed in factored form as follows: 85 85 // ··· 348 348 .quad 0x3fe61e3efda46467, 0x3fe0080402010080 // log(1.99609), 1/1.99609 349 349 350 350 .literal8 351 - .align (1<<3) 351 + .align 3 352 352 one: .quad 0x3ff0000000000000 353 353 onehalf: .quad 0x3fe0000000000000 354 354 onethird: .quad 0x3fd5555555555555 ··· 360 360 #else 361 361 #define RELATIVE_ADDR( _a ) (_a)-asinhf_body( %ecx ) 362 362 363 - .align (1<<4) 363 + .align 4 364 364 asinhf_pic: 365 365 movl (%esp), %ecx // Copy address of this instruction to %ecx 366 366 ret

+6 -6

src/libm/Source/Intel/atan2f.S

··· 29 29 C2: .double 0.0029352921857004596570518 30 30 31 31 32 - //.const 33 - .align (1<<4) 32 + .const 33 + .align 4 34 34 35 35 /* Define some coefficients for center polynomial (used for x in [-1, +1]). 36 36 These are stored in pairs at aligned addresses for use in SIMD ··· 42 42 C10: .double 5.4728447324456990092824269, 6.7197076223592378022736307 43 43 44 44 // This needs to be 16-byte aligned because it is used in an orpd instruction. 45 - .align (1<<4) 45 + .align 4 46 46 pPi: .double +3.141592653589793238462643 // pi. 47 47 48 48 ··· 209 209 (x1, y1) are in the same quadrant, then y0/x0 <= y1/x1 implies 210 210 atan2f(y0, x0) <= atan2f(y1, x1).) 211 211 */ 212 - .align (1<<5) 213 - .globl atan2f 214 - atan2f: 212 + .align 5 213 + .globl _atan2f 214 + _atan2f: 215 215 216 216 cvtss2sd Argy, y // Convert to double precision. 217 217 cvtss2sd Argx, x

+5 -5

src/libm/Source/Intel/atanf.S

··· 21 21 C2: .double 0.0029352921857004596570518 22 22 23 23 24 - //.const 25 - .align (1<<4) 24 + .const 25 + .align 4 26 26 27 27 /* Define some coefficients for center polynomial (used for x in [-1, +1]). 28 28 These are stored in pairs at aligned addresses for use in SIMD ··· 155 155 Exhaustive testing proved this routine returns faithfully rounded 156 156 results. 157 157 */ 158 - .align (1<<5) 158 + .align 5 159 159 #if !defined DevelopmentInstrumentation 160 160 // This is the regular name used in the deployed implementation. 161 - .globl atanf 162 - atanf: 161 + .globl _atanf 162 + _atanf: 163 163 #else 164 164 // This is the name used for a special test version of the routine. 165 165 .globl _atanfInstrumented

+5 -5

src/libm/Source/Intel/atanhf.S

··· 12 12 #include <machine/asm.h> 13 13 #include "abi.h" 14 14 15 - //.const 16 - .align (1<<4) 15 + .const 16 + .align 4 17 17 // Polynomial coefficients used for the 7/8 < |x| < 1 case. These are addressed by offset from big_table, 18 18 // so take care if you move things around. 19 19 ··· 554 554 // a = 7/8 555 555 556 556 .literal8 557 - .align (1<<3) 557 + .align 3 558 558 one_256th: .quad 0x3f70000000000000 // 1/256 559 559 one: .quad 0x3ff0000000000000 560 560 one_plus_eps: .quad 0x3ff0000000000001 // 1 + ulp ··· 567 567 neglog2_2: .quad 0xbfd62e42fefa39ef // -ln(2)/2 568 568 569 569 .literal4 570 - .align (1<<2) 570 + .align 2 571 571 f256: .long 0x43800000 // 256.0f 572 572 573 573 .text ··· 577 577 #elif defined( __i386__ ) 578 578 #define RELATIVE_ADDR( _a) (_a)-atanhf_body( CX_P ) 579 579 #define INDEX %edi 580 - .align (1<<4) 580 + .align 4 581 581 atanhf_pic: 582 582 movl (%esp), %ecx // copy address of local_addr to %ecx 583 583 ret

+3 -3

src/libm/Source/Intel/cbrtf.S

··· 52 52 #include <machine/asm.h> 53 53 #include "abi.h" 54 54 55 - //.const 56 - .align (1<<4) 55 + .const 56 + .align 4 57 57 58 58 // Minimax polynomial coefficents (addressed by offset from cbrt_table) 59 59 .quad 0x4000204182c17486, 0xbff8b5b876f0d973 // b1lo b1hi ··· 261 261 denormal_bias: .quad 0x3ff0000000000000, 0x3ab0000000000000 262 262 263 263 .text 264 - .align (1<<4) 264 + .align 4 265 265 266 266 #if defined( __x86_64__ ) 267 267 #define RELATIVE_ADDR( _a ) (_a)( %rip )

+11 -13

src/libm/Source/Intel/ceil.S

··· 7 7 * Copyright 2009, Apple Inc. 8 8 */ 9 9 10 - .type ceil, @function 11 10 #ifdef __i386__ 12 11 13 12 #ifdef __SSE3__ 14 - 15 13 16 14 .text 17 - .align (1<<4) 18 - .globl ceil 19 - ceil: 15 + .align 4 16 + .globl _ceil 17 + _ceil: 20 18 mov 8(%esp), %ecx 21 19 fldl 4(%esp) 22 20 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 ··· 48 46 #else // i386, no SSE3 49 47 50 48 .text 51 - .align (1<<4) 52 - .globl ceil 53 - ceil: 49 + .align 4 50 + .globl _ceil 51 + _ceil: 54 52 mov 8(%esp), %ecx 55 53 fldl 4(%esp) 56 54 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 ··· 93 91 94 92 #else // x86_64 95 93 96 - //.const 97 - .align (1<<4) 94 + .const 95 + .align 4 98 96 one: .quad 0x3ff0000000000000 99 97 absmask:.quad 0x7fffffffffffffff 100 98 thresh: .quad 0x4330000000000000 101 99 102 100 .text 103 - .align (1<<4) 104 - .globl ceil 105 - ceil: 101 + .align 4 102 + .globl _ceil 103 + _ceil: 106 104 movd %xmm0, %rcx 107 105 andq absmask(%rip), %rcx // |x| 108 106 cmpq thresh(%rip), %rcx // if |x| >= 0x1.0p52 or isnan(x)

+12 -13

src/libm/Source/Intel/ceilf.S

··· 3 3 * 4 4 * Steve Canon, March 2009. 5 5 */ 6 - 7 - .type ceilf, @function 6 + 8 7 #if defined __i386__ 9 8 10 9 #include <System/i386/cpu_capabilities.h> 11 10 .set cpubits, _COMM_PAGE_CPU_CAPABILITIES 12 11 13 12 .text 14 - .align (1<<4) 15 - .globl ceilf 16 - ceilf: 13 + .align 4 14 + .globl _ceilf 15 + _ceilf: 17 16 movss 4(%esp), %xmm0 // load argument 18 17 testl $(kHasSSE4_1), cpubits 19 18 jz 0f ··· 24 23 flds 4(%esp) 25 24 ret 26 25 27 - .align (1<<4) 26 + .align 4 28 27 0: // no SSE 4.1 29 28 mov 4(%esp), %eax 30 29 mov $23, %cl ··· 48 47 cvttps2dq %xmm0, %xmm0 // set inexact 49 48 1: flds 4(%esp) 50 49 ret 51 - .align (1<<4) 50 + .align 4 52 51 2: cvttps2dq %xmm0, %xmm0 // set inexact 53 52 cmp $1, %edx // if x > 0, goto 3 54 53 jge 3f 55 54 andl $0x80000000, 4(%esp) // copysign(0.0, x) 56 55 flds 4(%esp) 57 56 ret 58 - .align (1<<4) 57 + .align 4 59 58 3: movl $0x3f800000, 4(%esp) // return 1.0 60 59 flds 4(%esp) 61 60 ret 62 61 63 62 #elif defined __x86_64__ 64 63 65 - //.const 66 - .align (1<<4) 64 + .const 65 + .align 4 67 66 one: .long 0x3f800000 68 67 absmask:.long 0x7fffffff 69 68 70 69 .text 71 - .align (1<<4) 72 - .globl ceilf 73 - ceilf: 70 + .align 4 71 + .globl _ceilf 72 + _ceilf: 74 73 movd %xmm0, %eax 75 74 andl absmask(%rip), %eax 76 75 movd absmask(%rip), %xmm1

+4 -6

src/libm/Source/Intel/cosh.S

··· 12 12 #include <machine/asm.h> 13 13 #include "abi.h" 14 14 15 - .type cosh, @function 16 - 17 - //.const 18 - .align (1<<4) 15 + .const 16 + .align 4 19 17 // Polynomial coefficients, offset from exp2table 20 18 .quad 0x3f55e52272e0eaec, 0x3f55e52272e0eaec // c4 21 19 .quad 0x401cc9eea1e24220, 0x401cc9eea1e24220 // c3/c4 ··· 163 161 .double 0.20918130318061568990950557203211923657510680e-8 164 162 165 163 .literal8 166 - .align (1<<3) 164 + .align 3 167 165 one_n7: .quad 0x3f80000000000000 168 166 lge_p7: .quad 0x40671547652b82fe 169 167 lge_hi: .quad 0x3ff7154760000000 ··· 181 179 #else 182 180 #define RELATIVE_ADDR(_a) (_a)-cosh_body(%ecx) 183 181 184 - .align (1<<4) 182 + .align 4 185 183 cosh_pic: 186 184 movl (%esp), %ecx 187 185 ret

+3 -5

src/libm/Source/Intel/coshf.S

··· 11 11 #include <machine/asm.h> 12 12 #include "abi.h" 13 13 14 - .type coshf, @function 15 - 16 14 // This is identical to sinhf with some indices permuted to compute cosh instead. 17 15 // Read the detailed comments in that file for more information. 18 16 19 - //.const 20 - .align (1<<4) 17 + .const 18 + .align 4 21 19 .quad 0x3f811111111110fe, 0x3fa555f78359bc34 // c5, c4 = 0.0083333..., 0.0416715 22 20 .quad 0x4034000000000018, 0x4027ff4991a5ebc5 // c3/c5 = 0.1666666.../c5, c2/c4 = 0.5/c4 23 21 coshf_table: .quad 0x0000000000000000, 0x3ff0000000000000 // sinh(0/16), cosh(0/16) ··· 220 218 #define RELATIVE_ADDR(_a) (_a)-coshf_body( %ecx ) 221 219 #define RELATIVE_ADDR_B(_a) (_a)-coshf_body_eleven( %ecx ) 222 220 223 - .align (1<<4) 221 + .align 4 224 222 coshf_pic: 225 223 movl (%esp), %ecx // Copy address of this instruction to %ecx 226 224 ret

+7 -7

src/libm/Source/Intel/e_cbrtl.S

··· 8 8 */ 9 9 10 10 #define ENTRY(name) \ 11 - .globl name; \ 12 - .align (1<<4); \ 13 - name##: 11 + .globl _##name; \ 12 + .align 4; \ 13 + _##name##: 14 14 15 15 #if defined( __LP64__ ) 16 16 #define LOCAL_STACK_SIZE 20 ··· 20 20 21 21 #include "abi.h" 22 22 23 - //.const 24 - .align (1<<4) 23 + .const 24 + .align 4 25 25 onethird: .long 0xaaaaaaab, 0xaaaaaaaa, 0x00003ffd, 0x00000000 //(long double) 1.0L/3.0L 26 26 27 - .align (1<<3) 27 + .align 3 28 28 correction: .double 0.62996052494743658238361, 0.79370052598409973737585, 1.0, 1.2599210498948731647672, 1.5874010519681994747517 29 29 coeffs: .double 1.7830491344381518, -1.5730724799776633, 1.2536000054780357, -0.60460822457398278, 0.15834924310704463, -0.017322841453552703 30 30 31 - .align (1<<2) 31 + .align 2 32 32 infinity: .single +Infinity 33 33 34 34 // Stack:

+3 -3

src/libm/Source/Intel/exp.S

··· 20 20 Tables and Constants 21 21 22 22 ****************************************************************************/ 23 - //.const 24 - .align (1<<4) 23 + .const 24 + .align 4 25 25 26 26 27 27 // Coefficients for the 4th order minimax polynomial ··· 175 175 .quad 0x3f8111116e99ac77 176 176 177 177 .literal8 178 - .align (1<<3) 178 + .align 3 179 179 lge_p7: .quad 0x40671547652b82fe 180 180 lge: .quad 0x3ff71547652b82fe 181 181 lge_hi: .quad 0x3ff7154760000000

+3 -3

src/libm/Source/Intel/exp2.S

··· 17 17 Tables and Constants 18 18 19 19 ****************************************************************************/ 20 - //.const 21 - .align (1<<4) 20 + .const 21 + .align 4 22 22 23 23 24 24 // Coefficients for the 4th order minimax polynomial ··· 176 176 177 177 178 178 .literal8 179 - .align (1<<3) 179 + .align 3 180 180 one_p7: .quad 0x4060000000000000 181 181 tiny_val: .quad 0x0010000000000001 182 182 huge_val: .quad 0x7fefffffffffffff

+3 -3

src/libm/Source/Intel/exp2f.S

··· 55 55 // 56 56 // 57 57 58 - .align (1<<4) 58 + .align 4 59 59 // 8th order minimax fit of exp2 on [-1.0,1.0]. |error| < 0.402865722354948566583852e-9: 60 60 exp2f_c: .quad 0x40bc03f30399c376, 0x3ff000000001ea2a // c4/c8 = 0.961813690023115610862381719985771e-2 / 0.134107709538786543922336536865157e-5, c0 = 1.0 + 0.278626872016317130037181614004e-10 61 61 .quad 0x408f10e7f73e6d8f, 0x3fe62e42fd0933ee // c5/c8 = 0.133318252930790403741964203236548e-2 / 0.134107709538786543922336536865157e-5, c1 = .693147176943623740308984004029708 ··· 64 64 .quad 0x3eb67fe1dc3105ba // c8 = 0.134107709538786543922336536865157e-5 65 65 66 66 67 - .align (1<<3) 67 + .align 3 68 68 exp2f_nofenv_c: .quad 0x3ff0000000058fca // 1.0 + -8.09329727503262700660348520172e-11 69 69 .double 0.693147206709644041218074094717934 70 70 .double 0.240226515050550309232521176082490 ··· 85 85 #define RELATIVE_ADDR_B( _a) (_a)-exp2f_no_fenv_body( CX_P ) 86 86 87 87 //a short routine to get the local address 88 - .align (1<<4) 88 + .align 4 89 89 exp2f_pic: movl (%esp), %ecx //copy address of local_addr to %ecx 90 90 ret 91 91 #else

+3 -3

src/libm/Source/Intel/expf.S

··· 59 59 // 60 60 // 61 61 62 - .align (1<<4) 62 + .align 4 63 63 // 8th order minimax fit of exp2 on [-1.0,1.0]. |error| < 0.402865722354948566583852e-9: 64 64 expf_c: .quad 0x40bc03f30399c376, 0x3ff000000001ea2a // c4/c8 = 0.961813690023115610862381719985771e-2 / 0.134107709538786543922336536865157e-5, c0 = 1.0 + 0.278626872016317130037181614004e-10 65 65 .quad 0x408f10e7f73e6d8f, 0x3fe62e42fd0933ee // c5/c8 = 0.133318252930790403741964203236548e-2 / 0.134107709538786543922336536865157e-5, c1 = .693147176943623740308984004029708 ··· 68 68 .quad 0x3eb67fe1dc3105ba // c8 = 0.134107709538786543922336536865157e-5 69 69 70 70 71 - .align (1<<3) 71 + .align 3 72 72 expf_nofenv_c: .quad 0x3ff0000000058fca // 1.0 + -8.09329727503262700660348520172e-11 73 73 .double 0.693147206709644041218074094717934 74 74 .double 0.240226515050550309232521176082490 ··· 90 90 #define RELATIVE_ADDR_B( _a) (_a)-expf_no_fenv_body( CX_P ) 91 91 92 92 //a short routine to get the local address 93 - .align (1<<4) 93 + .align 4 94 94 expf_pic: movl (%esp), %ecx //copy address of local_addr to %ecx 95 95 ret 96 96 #else

+2 -2

src/libm/Source/Intel/expl.S

··· 14 14 inf: .long 0x7f800000 15 15 minf: .long 0xff800000 16 16 17 - //.const 18 - .align (1<<4) 17 + .const 18 + .align 4 19 19 maxldbl: .quad 0xffffffffffffffff, 0x7ffe 20 20 minldbl: .quad 0x8000000000000000, 0x0001 21 21 ln2_hi: .quad 0xB17217F7D1D00000, 0x3ffe

+3 -3

src/libm/Source/Intel/expm1.S

··· 20 20 Tables and Constants 21 21 22 22 ****************************************************************************/ 23 - //.const 24 - .align (1<<4) 23 + .const 24 + .align 4 25 25 26 26 // Coefficients for the 4th order minimax polynomial 27 27 // approximation of exp2( ) on the interval [-0x1p-27, 0x1p-7] ··· 176 176 // Other useful constants 177 177 178 178 .literal8 179 - .align (1<<3) 179 + .align 3 180 180 lge_p7: .quad 0x40671547652b82fe 181 181 lge: .quad 0x3ff71547652b82fe 182 182 lge_hi: .quad 0x3ff7154760000000

+3 -3

src/libm/Source/Intel/expm1f.S

··· 11 11 #include <machine/asm.h> 12 12 #include "abi.h" 13 13 14 - //.const 14 + .const 15 15 16 16 //minimax polynomial for exp2(x)-1 17 - .align (1<<4) 17 + .align 4 18 18 // 8th order minimax fit of exp2 on [-1.0,1.0]. |error| < 0.402865722354948566583852e-9: 19 19 expm1f_c: .quad 0x40bc03f30399c376, 0x3dbea2a63403aaa8 // c4/c8 = 0.961813690023115610862381719985771e-2 / 0.134107709538786543922336536865157e-5, c0 = 0.278626872016317130037181614004e-10 20 20 .quad 0x408f10e7f73e6d8f, 0x3fe62e42fd0933ee // c5/c8 = 0.133318252930790403741964203236548e-2 / 0.134107709538786543922336536865157e-5, c1 = .693147176943623740308984004029708 ··· 172 172 #define RELATIVE_ADDR_B( _a) (_a)-expm1f_no_fenv_body( CX_P ) 173 173 174 174 //a short routine to get the local address 175 - .align (1<<4) 175 + .align 4 176 176 expm1f_pic: movl (%esp), %ecx //copy address of local_addr to %ecx 177 177 ret 178 178 #else

+11 -11

src/libm/Source/Intel/floor.S

··· 12 12 #ifdef __SSE3__ 13 13 14 14 .text 15 - .align (1<<4) 16 - .globl floor 17 - floor: 15 + .align 4 16 + .globl _floor 17 + _floor: 18 18 mov 8(%esp), %ecx 19 19 fldl 4(%esp) 20 20 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 ··· 48 48 #else // i386, no SSE3 49 49 50 50 .text 51 - .align (1<<4) 52 - .globl floor 53 - floor: 51 + .align 4 52 + .globl _floor 53 + _floor: 54 54 mov 8(%esp), %ecx 55 55 and $0x7fffffff, %ecx // high word of |x| 56 56 fldl 4(%esp) ··· 78 78 79 79 #else // x86_64 80 80 81 - //.const 82 - .align (1<<4) 81 + .const 82 + .align 4 83 83 mone: .quad 0xbff0000000000000 84 84 absmask:.quad 0x7fffffffffffffff 85 85 thresh: .quad 0x432fffffffffffff 86 86 87 87 .text 88 - .align (1<<4) 89 - .globl floor 90 - floor: 88 + .align 4 89 + .globl _floor 90 + _floor: 91 91 movd %xmm0, %rcx 92 92 andq absmask(%rip), %rcx // |x| 93 93 subq $1, %rcx

+11 -11

src/libm/Source/Intel/floorf.S

··· 10 10 .set cpubits, _COMM_PAGE_CPU_CAPABILITIES 11 11 12 12 .text 13 - .align (1<<4) 14 - .globl floorf 15 - floorf: 13 + .align 4 14 + .globl _floorf 15 + _floorf: 16 16 movss 4(%esp), %xmm0 // load argument 17 17 testl $(kHasSSE4_1), cpubits 18 18 jz 0f ··· 23 23 flds 4(%esp) 24 24 ret 25 25 26 - .align (1<<4) 26 + .align 4 27 27 0: // no SSE 4.1 28 28 mov 4(%esp), %eax 29 29 mov $23, %cl ··· 48 48 cvttps2dq %xmm0, %xmm0 // set inexact 49 49 1: flds 4(%esp) 50 50 ret 51 - .align (1<<4) 51 + .align 4 52 52 2: cvttps2dq %xmm0, %xmm0 // set inexact 53 53 cmp $0xffffffff, %edx // if x < 0.0, goto 3 54 54 jl 3f 55 55 andl $0x80000000, 4(%esp) // copysign(0.0, x) 56 56 flds 4(%esp) 57 57 ret 58 - .align (1<<4) 58 + .align 4 59 59 3: movl $0xbf800000, 4(%esp) // return 1.0 60 60 flds 4(%esp) 61 61 ret 62 62 63 63 #elif defined __x86_64__ 64 64 65 - //.const 66 - .align (1<<4) 65 + .const 66 + .align 4 67 67 mone: .long 0xbf800000 68 68 absmask:.long 0x7fffffff 69 69 70 70 .text 71 - .align (1<<4) 72 - .globl floorf 73 - floorf: 71 + .align 4 72 + .globl _floorf 73 + _floorf: 74 74 movd %xmm0, %ecx 75 75 andl $0x7fffffff, %ecx // |x| 76 76 subl $1, %ecx // subtract 1. This forces |+-0| to -0

+9 -9

src/libm/Source/Intel/fmod.S

··· 6 6 */ 7 7 8 8 .text 9 - .align (1<<4) 10 - .globl fmodf 11 - fmodf: 9 + .align 4 10 + .globl _fmodf 11 + _fmodf: 12 12 #if defined __x86_64__ 13 13 #define xf -4(%rsp) 14 14 #define yf -8(%rsp) ··· 34 34 ret 35 35 36 36 .text 37 - .align (1<<4) 38 - .globl fmod 39 - fmod: 37 + .align 4 38 + .globl _fmod 39 + _fmod: 40 40 #if defined __x86_64__ 41 41 #define x -8(%rsp) 42 42 #define y -16(%rsp) ··· 62 62 ret 63 63 64 64 .text 65 - .align (1<<4) 66 - .globl fmodl 67 - fmodl: 65 + .align 4 66 + .globl _fmodl 67 + _fmodl: 68 68 #if defined __x86_64__ 69 69 #define xl 8(%rsp) 70 70 #define yl 24(%rsp)

+11 -11

src/libm/Source/Intel/hypot.S

··· 10 10 #endif 11 11 12 12 .text 13 - .globl hypot 14 - .globl cabs 13 + .globl _hypot 14 + .globl _cabs 15 15 16 16 #if defined __i386__ 17 17 ··· 20 20 21 21 // Entry point -------------------------------------------------------- 22 22 23 - .align (1<<4) 24 - cabs: 25 - hypot: 23 + .align 4 24 + _cabs: 25 + _hypot: 26 26 mov 8(%esp), %eax 27 27 mov 16(%esp), %edx 28 28 mov ABSHI, %ecx ··· 55 55 // Special case handling ---------------------------------------------- 56 56 57 57 L_xHiIsZero: 58 - cmpl $0, 4(%esp) // is the low word of x zero? 58 + cmp $0, 4(%esp) // is the low word of x zero? 59 59 jnz L_returnFromXHiIsZero // if not, jump back to mainline 60 60 L_returnAbsY: 61 61 and %ecx, 16(%esp) ··· 63 63 ret 64 64 65 65 L_yHiIsZero: 66 - cmpl $0, 12(%esp) // is the low word of y zero? 66 + cmp $0, 12(%esp) // is the low word of y zero? 67 67 jnz L_returnFromYHiIsZero // if not, jump back to mainline 68 68 L_returnAbsX: 69 69 and %ecx, 8(%esp) ··· 73 73 L_xIsSpecial: 74 74 cmp INFHI, %edx // check if y is infinity 75 75 jnz L_returnAbsX 76 - cmpl $0, 12(%esp) 76 + cmp $0, 12(%esp) 77 77 jz L_returnAbsY 78 78 jmp L_returnAbsX 79 79 ··· 84 84 85 85 // Entry point -------------------------------------------------------- 86 86 87 - .align (1<<4) 88 - cabs: 89 - hypot: 87 + .align 4 88 + _cabs: 89 + _hypot: 90 90 movd %xmm0, %rax 91 91 movd %xmm1, %rdx 92 92 mov ABSMASK, %rcx

+9 -9

src/libm/Source/Intel/hypotf.S

··· 13 13 #define INFINITY $0x7f800000 14 14 15 15 .text 16 - .globl cabsf 17 - .globl hypotf 16 + .globl _cabsf 17 + .globl _hypotf 18 18 19 19 // Entry points ------------------------------------------------------- 20 20 21 21 #if defined __i386__ 22 - .align (1<<4) 23 - cabsf: // on i386, we can use the same code for 24 - hypotf: // hypotf and cabsf, because the arguments 22 + .align 4 23 + _cabsf: // on i386, we can use the same code for 24 + _hypotf: // hypotf and cabsf, because the arguments 25 25 mov 4(%esp), %eax // come in at the same stack offsets 26 26 mov 8(%esp), %edx // 27 27 movss 4(%esp), %xmm0 // real at esp + 4 28 28 movss 8(%esp), %xmm1 // imag at esp + 8 29 29 #else 30 - .align (1<<4) // however, on x86_64, the registers used 31 - cabsf: // are different. cabsf's arguments come 30 + .align 4 // however, on x86_64, the registers used 31 + _cabsf: // are different. cabsf's arguments come 32 32 pshufd $0xfd, %xmm0, %xmm1 // in packed in xmm0. 33 - .align (1<<4) 34 - hypotf: // 33 + .align 4 34 + _hypotf: // 35 35 movd %xmm0, %eax // hypotf, on the other hand, gets x in 36 36 movd %xmm1, %edx // xmm0 and y in xmm1. 37 37 #endif

+6 -6

src/libm/Source/Intel/hypotl.S

··· 26 26 // Entry point --------------------------------------------------------------- 27 27 28 28 .text 29 - .globl hypotl 30 - .globl cabsl 31 - .align (1<<4) 32 - hypotl: 33 - cabsl: 29 + .globl _hypotl 30 + .globl _cabsl 31 + .align 4 32 + _hypotl: 33 + _cabsl: 34 34 35 35 // Check argument scaling ---------------------------------------------------- 36 36 // ··· 262 262 // Useful constants ---------------------------------------------------------- 263 263 264 264 .literal16 265 - .align (1<<4) 265 + .align 4 266 266 L_infinity: 267 267 .quad 0x8000000000000000, 0x0000000000007fff 268 268 L_tiny:

+6 -6

src/libm/Source/Intel/log.S

··· 18 18 #include <machine/asm.h> 19 19 #include "abi.h" 20 20 21 - //.const 22 - .align (1<<5) 21 + .const 22 + .align 5 23 23 xone: .quad 0x3ff0000000000000, 0 24 24 frexp_exp_mask: .quad 0x7ff0000000000000, 0 25 25 frexp_mant_mask: .quad 0x800fffffffffffff, 0 ··· 28 28 //log10_key_mask: .quad 0x07e0000000000000, 0 29 29 log1p_not_ulp_mask: .quad 0xfffffffffffffffe, 0 30 30 31 - .align (1<<5) 31 + .align 5 32 32 lgel: .quad 0xB8AA3B295C17F0BC, 0x3fff 33 33 ln2l: .quad 0xB17217F7D1CF79AC, 0x3ffe //ln(2) rounded up to long double 34 34 ··· 37 37 38 38 c0: .quad 0xFFFFFFFFFFFFFFD7, 0xbffd //c0 = -.4999999999999999988974167423L 39 39 40 - .align (1<<5) 40 + .align 5 41 41 a01: .double .827742667285236703751556405085096, -2.00038644890076831031534988283768 //a0,a1 42 42 b01: .double 1.51843353412997067893915870795354, 1.54454569915832086827096843200102 //b0,b1 43 43 44 44 // The lookup table is in a funny format since it has 2 long double and a single. 45 45 // {10-byte va ; 2-byte pad ; 4-byte single a ; 10-byte lg1pa ; 6-byte pad} 46 - .align (1<<5) 46 + .align 5 47 47 LOOKUP: 48 48 // This is the table for a, ap1, va, lg1pa: a = (float)k*scale, ap1 = a + 1, va = (long double)1./(1.+a), lg1pa = (long double)log2(1.+a) 49 49 // In C this would be ··· 375 375 .quad 0x3ff0000000000000, 0x0000000000000000 //{0x1p+0, 0x0p+0}, k=63 376 376 377 377 .literal8 378 - .align (1<<3) 378 + .align 3 379 379 one: .double 1.0 380 380 mone: .double -1.0 381 381

+4 -4

src/libm/Source/Intel/log10f.S

··· 9 9 #include <machine/asm.h> 10 10 #include "abi.h" 11 11 12 - //.const 13 - .align (1<<4) 12 + .const 13 + .align 4 14 14 15 15 // 256 entry of Lookup table of values used for log10 calculation, generated as: 16 16 // ··· 300 300 301 301 302 302 .literal8 303 - .align (1<<3) 303 + .align 3 304 304 one: .double 1.0 305 305 onehalf: .double 0.5 306 306 onethird: .quad 0x3fd5555555555555 // 1/3 ··· 321 321 #define RELATIVE_ADDR2( _a, _i, _step) (_a)-rel_addr( CX_P, _i, _step ) 322 322 323 323 //a short routine to get the local address 324 - .align (1<<4) 324 + .align 4 325 325 log10f_pic: movl (%esp), %ecx //copy address of local_addr to %ecx 326 326 ret 327 327 #else

+4 -4

src/libm/Source/Intel/log2f.S

··· 9 9 #include <machine/asm.h> 10 10 #include "abi.h" 11 11 12 - //.const 13 - .align (1<<4) 12 + .const 13 + .align 4 14 14 15 15 // 256 entry of Lookup table of values used for log2 calculation, generated as: 16 16 // ··· 300 300 .quad 0x3fefe8e4f15bd1a0, 0x3fe0080402010080 //log2(1.99609), 1/1.99609 301 301 302 302 .literal8 303 - .align (1<<3) 303 + .align 3 304 304 one: .double 1.0 305 305 onehalf: .double 0.5 306 306 onethird: .quad 0x3fd5555555555555 // 1/3 ··· 321 321 #define RELATIVE_ADDR2( _a, _i, _step) (_a)-rel_addr( CX_P, _i, _step ) 322 322 323 323 //a short routine to get the local address 324 - .align (1<<4) 324 + .align 4 325 325 log2f_pic: movl (%esp), %ecx //copy address of local_addr to %ecx 326 326 ret 327 327 #else

+5 -5

src/libm/Source/Intel/logf.S

··· 15 15 #include <machine/asm.h> 16 16 #include "abi.h" 17 17 18 - //.const 19 - .align (1<<4) 18 + .const 19 + .align 4 20 20 21 21 // 511 entry of Lookup table of values used for log and log1p calculation, generated as: 22 22 // ··· 436 436 437 437 438 438 .literal8 439 - .align (1<<3) 439 + .align 3 440 440 one: .double 1.0 441 441 onehalf: .double 0.5 442 442 onethird: .quad 0x3fd5555555555555 // 1/3 ··· 448 448 log2: .quad 0x3fe62e42fefa39efULL // ln(2) 449 449 450 450 .literal4 451 - .align (1<<2) 451 + .align 2 452 452 f256: .long 0x43800000 //256.0f 453 453 r256: .long 0x3b800000 //1.0f/256.0f 454 454 ··· 465 465 #define RELATIVE_ADDR2( _a, _i, _step) (_a)-rel_addr( CX_P, _i, _step ) 466 466 467 467 //a short routine to get the local address 468 - .align (1<<4) 468 + .align 4 469 469 logf_pic: movl (%esp), %ecx //copy address of local_addr to %ecx 470 470 ret 471 471 #else

+3 -3

src/libm/Source/Intel/lroundl.S

··· 11 11 #include "abi.h" 12 12 #include <machine/asm.h> 13 13 14 - .align (1<<2) 14 + .align 2 15 15 .literal4 16 16 two63: .long 0x5f000000 17 17 mtwo63: .long 0xdf000000 18 18 one: .long 1 19 19 inf: .long 0x7f800000 20 20 21 - .align (1<<3) 21 + .align 3 22 22 .literal8 23 23 cutoff32: .double 2147483647.5 // 2**31-0.5 24 24 mcutoff32: .double -2147483648.5 // 2**31-0.5 25 25 26 26 27 - .align (1<<4) 27 + .align 4 28 28 .literal16 29 29 cutoff: .quad 0xffffffffffffffff, 0x403d 30 30 sign: .quad 0x0, 0xffffffffffffffff

+6 -7

src/libm/Source/Intel/machine/asm.h

··· 22 22 */ 23 23 24 24 #define ENTRY(name) \ 25 - .type name, @function; \ 26 - .globl name; \ 27 - .align (2<<4); \ 28 - name##: 25 + .globl _##name; \ 26 + .align 4; \ 27 + _##name##: 29 28 30 29 #ifndef NO_FENV 31 30 #define PRIVATE_ENTRY(name) \ 32 - .globl name; .hidden name; \ 33 - .align (2<<4); \ 34 - name##: 31 + .globl _##name; .private_extern _##name; \ 32 + .align 4; \ 33 + _##name##: 35 34 #else 36 35 #define PRIVATE_ENTRY(name) 37 36 #endif

+8 -8

src/libm/Source/Intel/modf.S

··· 25 25 #if defined __i386__ 26 26 27 27 .text 28 - .align (1<<4) 29 - .globl modf 30 - modf: 28 + .align 4 29 + .globl _modf 30 + _modf: 31 31 movl 8(%esp), %eax // high word of x 32 32 andl $0x7fffffff, %eax // high word of |x| 33 33 subl $0x3ff00000, %eax // subtract off exponent bias ··· 78 78 79 79 #else // __x86_64__ 80 80 81 - //.const 82 - .align (1<<4) 81 + .const 82 + .align 4 83 83 absmask: 84 84 .quad 0x7fffffffffffffff 85 85 one: 86 86 .quad 0x3ff0000000000000 87 87 88 88 .text 89 - .align (1<<4) 90 - .globl modf 91 - modf: 89 + .align 4 90 + .globl _modf 91 + _modf: 92 92 movsd absmask(%rip), %xmm1 93 93 andpd %xmm0, %xmm1 // |x| 94 94 movd %xmm1, %rax

+1 -1

src/libm/Source/Intel/modff.S

··· 85 85 movl 4+FRAME_SIZE( STACKP), RESULT_P 86 86 flds FRAME_SIZE( STACKP ) // { x } 87 87 fld %st(0) 88 - fstpl (RESULT_P) 88 + fstp (RESULT_P) 89 89 #else 90 90 movss %xmm0, (RESULT_P) 91 91 #endif

+4 -4

src/libm/Source/Intel/nearbyint.S

··· 5 5 // 6 6 // -- Stephen Canon, January 2010 7 7 8 - #include <i386/cpu_capabilities.h> 8 + #include <System/i386/cpu_capabilities.h> 9 9 10 10 .text 11 - .align (1<<4) 12 - .globl nearbyint 13 - nearbyint: 11 + .align 4 12 + .globl _nearbyint 13 + _nearbyint: 14 14 15 15 // Fast path if SSE4.1 is available ------------------------------------------ 16 16

+4 -4

src/libm/Source/Intel/nearbyintf.S

··· 5 5 // 6 6 // -- Stephen Canon, January 2010 7 7 8 - #include <i386/cpu_capabilities.h> 8 + #include <System/i386/cpu_capabilities.h> 9 9 10 10 .text 11 - .align (1<<4) 12 - .globl nearbyintf 13 - nearbyintf: 11 + .align 4 12 + .globl _nearbyintf 13 + _nearbyintf: 14 14 15 15 // Fast path if SSE4.1 is available ------------------------------------------ 16 16

+3 -3

src/libm/Source/Intel/nextafterl.S

··· 11 11 #include "abi.h" 12 12 13 13 #if defined( __x86_64__ ) 14 - .align (1<<3) 14 + .align 3 15 15 .literal8 16 16 signBit: .quad 0x8000000000000000 17 17 nonSignBit: .quad 0x7fffffffffffffff 18 18 #endif 19 19 20 - .align (1<<4) 21 - //.const 20 + .align 4 21 + .const 22 22 23 23 tiny: .quad 0x8000000000000000, 0x1 //0x1.0p-16382 24 24 big: .quad 0x8000000000000000, 0x7ffe //0x1.0p16383

+5 -5

src/libm/Source/Intel/powf.S

··· 16 16 #include "machine/asm.h" 17 17 #include "abi.h" 18 18 19 - //.const 19 + .const 20 20 gMaskShift: .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 21 21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 22 22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ··· 34 34 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31, \ 35 35 31,31,31,31, 31,31,31,31,31,31,31,31,31,31,31,31 36 36 37 - .align (1<<4) 37 + .align 4 38 38 // 8th order minimax fit of exp2 on [-1.0,1.0]. |error| < 0.402865722354948566583852e-9: 39 39 powf_exp_c: .quad 0x40bc03f30399c376, 0x3ff000000001ea2a // c4/c8 = 0.961813690023115610862381719985771e-2 / 0.134107709538786543922336536865157e-5, c0 = 1.0 + 0.278626872016317130037181614004e-10 40 40 .quad 0x408f10e7f73e6d8f, 0x3fe62e42fd0933ee // c5/c8 = 0.133318252930790403741964203236548e-2 / 0.134107709538786543922336536865157e-5, c1 = .693147176943623740308984004029708 ··· 98 98 // exact powers of two. (These are 1.0 + 1 ulp and 1.0 - 1ulp.) The other values will land somewhere in the 99 99 // range [ 1.0 - 2**-8, 1.0 + 2**-7 ]. (Experimentally verified for all floats 1.0 <= x < 2.0.) 100 100 // 101 - .align (1<<3) 101 + .align 3 102 102 powf_log2_c: .quad 0x3ff0000000000000, 0x8000000000000000 // 1, -log2l(1) 103 103 .quad 0x3fefc00000000000, 0x3f872c7ba20f7327 // 0.992188, -log2l(0.992188) 104 104 .quad 0x3fef80fe03f80fe0, 0x3f9715662c7f3dbc // 0.984496, -log2l(0.984496) ··· 659 659 jmp 9f 660 660 661 661 662 - .align (1<<4) 662 + .align 4 663 663 // return value in %xmm0 664 664 9: 665 665 pop DI_P ··· 697 697 movapd %xmm0, %xmm2 // r = x 698 698 jmp 3f 699 699 700 - .align (1<<4) 700 + .align 4 701 701 // do{ 702 702 2: mulsd %xmm0, %xmm0 // x *= x 703 703 test $1, %edx

+5 -5

src/libm/Source/Intel/rint.S

··· 6 6 // 7 7 // -- Stephen Canon, January 2010 8 8 9 - .globl rint 9 + .globl _rint 10 10 11 11 #if defined __i386__ 12 12 13 13 .text 14 - .align (1<<4) 15 - rint: 14 + .align 4 15 + _rint: 16 16 fldl 4(%esp) 17 17 frndint 18 18 ret ··· 24 24 twop52: .quad 0x4330000000000000 25 25 26 26 .text 27 - .align (1<<4) 28 - rint: 27 + .align 4 28 + _rint: 29 29 movd %xmm0, %rcx 30 30 mov %rcx, %rdx 31 31 and absmask(%rip), %rcx // |x|

+3 -3

src/libm/Source/Intel/rintf.S

··· 7 7 // -- Stephen Canon, January 2010 8 8 9 9 .text 10 - .align (1<<4) 11 - .globl rintf 12 - rintf: 10 + .align 4 11 + .globl _rintf 12 + _rintf: 13 13 #if defined __i386__ 14 14 movl 4(%esp), %ecx 15 15 movss 4(%esp), %xmm0

+3 -3

src/libm/Source/Intel/rintl.S

··· 21 21 #endif 22 22 23 23 .text 24 - .align (1<<4) 25 - .globl rintl 26 - rintl: 24 + .align 4 25 + .globl _rintl 26 + _rintl: 27 27 movswl exponent, %eax 28 28 fldt input 29 29 mov %eax, %edx

+8 -8

src/libm/Source/Intel/round.S

··· 25 25 #endif 26 26 27 27 .text 28 - .align (1<<4) 29 - .globl round 30 - round: 28 + .align 4 29 + .globl _round 30 + _round: 31 31 movl 8(%esp), %ecx 32 32 cmpl $0x43300000, %ecx 33 33 fldl 4(%esp) ··· 67 67 68 68 #else //x86_64 69 69 70 - //.const 71 - .align (1<<4) 70 + .const 71 + .align 4 72 72 one: .quad 0x3ff0000000000000 73 73 absmask:.quad 0x7fffffffffffffff 74 74 half: .quad 0x3fe0000000000000 75 75 thresh: .quad 0x4330000000000000 76 76 77 77 .text 78 - .align (1<<4) 79 - .globl round 80 - round: 78 + .align 4 79 + .globl _round 80 + _round: 81 81 movd %xmm0, %rcx 82 82 andq absmask(%rip), %rcx 83 83 movsd absmask(%rip), %xmm2

+14 -14

src/libm/Source/Intel/roundf.S

··· 7 7 #if defined __i386__ 8 8 9 9 .text 10 - .align (1<<4) 11 - .globl roundf 12 - roundf: 10 + .align 4 11 + .globl _roundf 12 + _roundf: 13 13 mov 4(%esp), %eax // load the input, x 14 14 movss 4(%esp), %xmm0 15 15 and $0x7f800000, %eax // |x| ··· 31 31 cvttps2dq %xmm0, %xmm0 // raise inexact 32 32 1: flds 4(%esp) 33 33 ret 34 - .align (1<<4) 34 + .align 4 35 35 2: je 3f // if |x| >= 0.5f, goto 3 36 36 andl $0x80000000, 4(%esp) // copysign(0.0, x) 37 37 cvttps2dq %xmm0, %xmm0 // raise inexact 38 38 flds 4(%esp) 39 39 ret 40 - .align (1<<4) 40 + .align 4 41 41 3: addl $0x00800000, 4(%esp) 42 42 andl $0xff800000, 4(%esp) // copysign(1.0, x) 43 43 cvttps2dq %xmm0, %xmm0 // raise inexact ··· 46 46 47 47 #elif defined __x86_64__ 48 48 49 - //.const 50 - .align (1<<4) 49 + .const 50 + .align 4 51 51 mzero: .long 0x80000000 52 - .align (1<<4) 52 + .align 4 53 53 expbit: .long 0x00800000 54 - .align (1<<4) 54 + .align 4 55 55 expmask:.long 0xff800000 56 56 57 57 .text 58 - .align (1<<4) 59 - .globl roundf 60 - roundf: 58 + .align 4 59 + .globl _roundf 60 + _roundf: 61 61 movd %xmm0, %eax 62 62 and $0x7f800000, %eax // |x| 63 63 mov $23, %ecx ··· 79 79 paddd %xmm2, %xmm0 // add 0.5 (ish -- non-integral bits are garbage) 80 80 pand %xmm3, %xmm0 // truncate 81 81 1: ret 82 - .align (1<<4) 82 + .align 4 83 83 2: je 3f // if |x| >= 0.5f, goto 3 84 84 cvttps2dq %xmm0, %xmm1 // raise inexact 85 85 andps mzero(%rip), %xmm0 // copysign(0.0, x) 86 86 ret 87 - .align (1<<4) 87 + .align 4 88 88 3: cvttps2dq %xmm0, %xmm1 // raise inexact 89 89 paddd expbit(%rip), %xmm0 90 90 pand expmask(%rip), %xmm0 // copysign(1.0, x)

+17 -17

src/libm/Source/Intel/scalbn.S

··· 8 8 // 9 9 // -- Stephen Canon, January 2010 10 10 11 - .globl scalbn 12 - .globl ldexp 13 - .globl scalb 14 - .globl scalbln 11 + .globl _scalbn 12 + .globl _ldexp 13 + .globl _scalb 14 + .globl _scalbln 15 15 16 16 #if defined __x86_64__ 17 17 ··· 26 26 #define clamplo mthreek(%rip) 27 27 28 28 .text 29 - .align (1<<4) 30 - scalb: 29 + .align 4 30 + _scalb: 31 31 movsd %xmm0, floatval 32 32 pcmpeqd %xmm0, %xmm0 33 33 fldl floatval // load x on x87 ··· 45 45 movapd %xmm1, %xmm0 // since n is a NaN, we just return n 46 46 ret 47 47 48 - .align (1<<4) 49 - scalbln: 48 + .align 4 49 + _scalbln: 50 50 movsd %xmm0, floatval 51 51 pcmpeqd %xmm0, %xmm0 52 52 fldl floatval // load x on x87 ··· 59 59 cmovl %rdx, ln // n == -3000 60 60 jmp L_common 61 61 62 - .align (1<<4) 63 - scalbn: 64 - ldexp: 62 + .align 4 63 + _scalbn: 64 + _ldexp: 65 65 movsd %xmm0, floatval 66 66 fldl floatval // load x on x87 67 67 pcmpeqd %xmm0, %xmm0 ··· 97 97 #define clamplo (mthreek-0b)(%ecx) 98 98 99 99 .text 100 - .align (1<<4) 101 - scalb: 100 + .align 4 101 + _scalb: 102 102 call 0f 103 103 0: pop %ecx // conjure pic info 104 104 movsd 12(%esp), %xmm1 // load n ··· 118 118 fldl 12(%esp) // n is nan, return n 119 119 ret 120 120 121 - .align (1<<4) 122 - scalbln: 123 - scalbn: 124 - ldexp: 121 + .align 4 122 + _scalbln: 123 + _scalbn: 124 + _ldexp: 125 125 mov 12(%esp), n // load n 126 126 fldl floatval // load x on x87 127 127 fld1

+12 -12

src/libm/Source/Intel/scalbnf.S

··· 8 8 9 9 10 10 .text 11 - .globl scalblnf 12 - .globl scalbnf 13 - .globl ldexpf 11 + .globl _scalblnf 12 + .globl _scalbnf 13 + .globl _ldexpf 14 14 15 15 #if defined __x86_64__ 16 16 17 17 #define n %edi 18 18 #define ln %rdi 19 19 20 - .align (1<<4) 21 - scalblnf: 20 + .align 4 21 + _scalblnf: 22 22 cvtss2sd %xmm0, %xmm0 // (double)x 23 23 mov $300, %rdx 24 24 cmp %rdx, ln // if n > 300 ··· 28 28 cmovl %rdx, ln // n == -300 29 29 jmp L_common 30 30 31 - .align (1<<4) 32 - scalbnf: 33 - ldexpf: 31 + .align 4 32 + _scalbnf: 33 + _ldexpf: 34 34 cvtss2sd %xmm0, %xmm0 // (double)x 35 35 mov $300, %edx 36 36 cmp %edx, n // if n > 300 ··· 50 50 51 51 #define n %eax 52 52 53 - .align (1<<4) 54 - scalblnf: 55 - scalbnf: 56 - ldexpf: 53 + .align 4 54 + _scalblnf: 55 + _scalbnf: 56 + _ldexpf: 57 57 cvtss2sd 4(%esp), %xmm0 // (double)x 58 58 mov 8(%esp), n 59 59 mov $300, %edx

+12 -12

src/libm/Source/Intel/scalbnl.S

··· 8 8 9 9 10 10 .text 11 - .globl scalblnl 12 - .globl scalbnl 13 - .globl ldexpl 11 + .globl _scalblnl 12 + .globl _scalbnl 13 + .globl _ldexpl 14 14 15 15 #if defined __x86_64__ 16 16 ··· 20 20 #define ln %rdi 21 21 #define nw %di 22 22 23 - .align (1<<4) 24 - scalblnl: 23 + .align 4 24 + _scalblnl: 25 25 fldt 8(%rsp) // load x 26 26 fld1 27 27 fstpt floatval // conjure 1.0L ··· 32 32 jl L_bigNegativeN // carefully. 33 33 jmp L_scaleX 34 34 35 - .align (1<<4) 36 - scalbnl: 37 - ldexpl: 35 + .align 4 36 + _scalbnl: 37 + _ldexpl: 38 38 fldt 8(%rsp) // load x 39 39 40 40 #elif defined __i386__ ··· 44 44 #define n %eax 45 45 #define nw %ax 46 46 47 - .align (1<<4) 48 - scalblnl: 49 - scalbnl: 50 - ldexpl: 47 + .align 4 48 + _scalblnl: 49 + _scalbnl: 50 + _ldexpl: 51 51 mov 20(%esp), n // load n 52 52 fldt floatval // load x 53 53

+9 -9

src/libm/Source/Intel/sinfcosf.S

··· 9 9 10 10 .literal8 11 11 12 - .align (1<<3) 12 + .align 3 13 13 14 14 Half: .double .5 15 15 16 16 17 - //.const 17 + .const 18 18 19 - .align (1<<4) 19 + .align 4 20 20 21 21 /* Coefficients to calculate x*(x**4+c0*x**2+c1) * c4*(x**4+c2*x**2+c3). 22 22 ··· 154 154 Exhaustive testing proved this routine returns faithfully rounded 155 155 results. 156 156 */ 157 - .align (1<<5) 158 - .globl sinf 159 - sinf: 157 + .align 5 158 + .globl _sinf 159 + _sinf: 160 160 161 161 // Put x into an integer register. 162 162 #if defined __i386__ ··· 361 361 Exhaustive testing proved this routine returns faithfully rounded 362 362 results. 363 363 */ 364 - .align (1<<5) 365 - .globl cosf 366 - cosf: 364 + .align 5 365 + .globl _cosf 366 + _cosf: 367 367 // This code is identical to _sinf except for the addition of Half. 368 368 369 369 // Put x into an integer register.

+4 -4

src/libm/Source/Intel/sinh.S

··· 12 12 #include <machine/asm.h> 13 13 #include "abi.h" 14 14 15 - //.const 16 - .align (1<<4) 15 + .const 16 + .align 4 17 17 // Polynomial coefficients, offset from exp2table 18 18 .quad 0x3f55e52272e0eaec, 0x3f55e52272e0eaec // c4 19 19 .quad 0x401cc9eea1e24220, 0x401cc9eea1e24220 // c3/c4 ··· 161 161 .quad 0x3de61bfa2e91919a 162 162 163 163 .literal8 164 - .align (1<<3) 164 + .align 3 165 165 one_n7: .quad 0x3f80000000000000 166 166 lge_p7: .quad 0x40671547652b82fe 167 167 lge_hi: .quad 0x3ff7154760000000 ··· 178 178 #else 179 179 #define RELATIVE_ADDR(_a) (_a)-sinh_body(%ecx) 180 180 181 - .align (1<<4) 181 + .align 4 182 182 sinh_pic: 183 183 movl (%esp), %ecx 184 184 ret

+3 -3

src/libm/Source/Intel/sinhf.S

··· 47 47 // For the "uninteresting range", the code is taken directly from expf. Read the 48 48 // comments in that file for more information. 49 49 50 - //.const 50 + .const 51 51 52 - .align (1<<4) 52 + .align 4 53 53 .quad 0x3f811111111110fe, 0x3fa555f78359bc34 // c5, c4 = 0.0083333..., 0.0416715 54 54 .quad 0x4034000000000018, 0x4027ff4991a5ebc5 // c3/c5 = 0.1666666.../c5, c2/c4 = 0.5/c4 55 55 .quad 0x0000000000000000, 0x3ff0000000000000 // 0, 1 ··· 252 252 #define RELATIVE_ADDR(_a) (_a)-sinhf_body( %ecx ) 253 253 #define RELATIVE_ADDR_B(_a) (_a)-sinhf_body_eleven( %ecx ) 254 254 255 - .align (1<<4) 255 + .align 4 256 256 sinhf_pic: 257 257 movl (%esp), %ecx // Copy address of this instruction to %ecx 258 258 ret

+6 -6

src/libm/Source/Intel/tanf.S

··· 9 9 10 10 .literal8 11 11 12 - .align (1<<3) 12 + .align 3 13 13 14 14 NegativeOne: 15 15 .double -1 16 16 17 17 18 - //.const 18 + .const 19 19 20 - .align (1<<4) 20 + .align 4 21 21 22 22 /* Coefficients to calculate: 23 23 ··· 167 167 Exhaustive testing proved this routine returns faithfully rounded 168 168 results. 169 169 */ 170 - .align (1<<5) 171 - .globl tanf 172 - tanf: 170 + .align 5 171 + .globl _tanf 172 + _tanf: 173 173 174 174 // Put x into an integer register. 175 175 #if defined __i386__

+3 -3

src/libm/Source/Intel/tanh.S

··· 17 17 Tables and Constants 18 18 19 19 ****************************************************************************/ 20 - //.const 21 - .align (1<<4) 20 + .const 21 + .align 4 22 22 abs_mask: .quad 0x7fffffffffffffff, 0x0000000000000000 23 23 four_exp: .quad 0x0020000000000000, 0x0000000000000000 24 24 one: .quad 0x3ff0000000000000, 0x0000000000000000 ··· 324 324 .quad 0xbf29111400a41fbf, 0x0000000000000000 // c 325 325 326 326 .literal8 327 - .align (1<<3) 327 + .align 3 328 328 m2_lge_p7: .quad 0xc0771547652b82fe 329 329 m2_lge: .quad 0xc0071547652b82fe 330 330 m2_lge_hi: .quad 0xc007154760000000

+3 -5

src/libm/Source/Intel/tanhf.S

··· 11 11 #include <machine/asm.h> 12 12 #include "abi.h" 13 13 14 - .type tanhf, @function 15 - 16 14 // This is identical to sinhf with some indices permuted to compute tanh instead. 17 15 // Read the detailed comments in that file for more information. 18 16 19 - //.const 20 - .align (1<<4) 17 + .const 18 + .align 4 21 19 .quad 0x3f811111111110fe, 0x3fa555f78359bc34 // c5, c4 = 0.0083333..., 0.0416715 22 20 .quad 0x4034000000000018, 0x4027ff4991a5ebc5 // c3/c5 = 0.1666666.../c5, c2/c4 = 0.5/c4 23 21 tanhf_table: .quad 0x0000000000000000, 0x3ff0000000000000 // sinh(0/16), cosh(0/16) ··· 210 208 #define RELATIVE_ADDR(_a) (_a)-tanhf_body( %ecx ) 211 209 #define RELATIVE_ADDR_B(_a) (_a)-tanhf_body_eleven( %ecx ) 212 210 213 - .align (1<<4) 211 + .align 4 214 212 tanhf_pic: 215 213 movl (%esp), %ecx // Copy address of this instruction to %ecx 216 214 ret

+8 -8

src/libm/Source/Intel/trunc.S

··· 25 25 #endif 26 26 27 27 .text 28 - .align (1<<4) 29 - .globl trunc 30 - trunc: 28 + .align 4 29 + .globl _trunc 30 + _trunc: 31 31 movl 8(%esp), %ecx 32 32 cmpl $0x43300000, %ecx 33 33 fldl 4(%esp) ··· 47 47 48 48 #else //x86_64 49 49 50 - //.const 51 - .align (1<<4) 50 + .const 51 + .align 4 52 52 one: .quad 0x3ff0000000000000 53 53 absmask:.quad 0x7fffffffffffffff 54 54 half: .quad 0x3fe0000000000000 55 55 thresh: .quad 0x4330000000000000 56 56 57 57 .text 58 - .align (1<<4) 59 - .globl trunc 60 - trunc: 58 + .align 4 59 + .globl _trunc 60 + _trunc: 61 61 movd %xmm0, %rcx 62 62 andq absmask(%rip), %rcx 63 63 movsd absmask(%rip), %xmm2

+11 -11

src/libm/Source/Intel/truncf.S

··· 12 12 #if defined __i386__ 13 13 14 14 .text 15 - .align (1<<4) 16 - .globl truncf 17 - truncf: 15 + .align 4 16 + .globl _truncf 17 + _truncf: 18 18 movss 4(%esp), %xmm0 // load argument 19 19 testl $(kHasSSE4_1), cpubits 20 20 jz 0f ··· 24 24 flds 4(%esp) // 25 25 ret 26 26 27 - .align (1<<4) 27 + .align 4 28 28 0: mov 4(%esp), %eax // load the input, x 29 29 and $0x7f800000, %eax // |x| 30 30 mov $23, %ecx ··· 40 40 1: flds 4(%esp) 41 41 ret 42 42 43 - .align (1<<4) 43 + .align 4 44 44 2: // Handle |x| < 1.0 here. 45 45 andl $0x80000000, 4(%esp) // copysign(0.0, x) 46 46 cvttps2dq %xmm0, %xmm0 // raise inexact ··· 49 49 50 50 #elif defined __x86_64__ 51 51 52 - //.const 53 - .align (1<<4) 52 + .const 53 + .align 4 54 54 mzero: .long 0x80000000 55 55 56 56 .text 57 - .align (1<<4) 58 - .globl truncf 59 - truncf: 57 + .align 4 58 + .globl _truncf 59 + _truncf: 60 60 movd %xmm0, %eax 61 61 and $0x7f800000, %eax // |x| 62 62 mov $23, %ecx ··· 71 71 cvttps2dq %xmm0, %xmm1 // raise inexact 72 72 andps %xmm2, %xmm0 // mask off non-integral bits 73 73 1: ret 74 - .align (1<<4) 74 + .align 4 75 75 2: cvttps2dq %xmm0, %xmm1 // raise inexact 76 76 andps mzero(%rip), %xmm0 // copysign(0.0, x) 77 77 ret

Configure Feed

Configure Feed