Masking glibc symbols for better portability (#4180)

* Masking glibc symbols for better portability

* Remove redundant files
This commit is contained in:
Amos Bird
2021-03-05 13:15:55 +08:00
committed by GitHub
parent c95f00d508
commit d6ac8f4e35
80 changed files with 6676 additions and 3 deletions

View File

@ -501,8 +501,11 @@ else()
message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}")
endif()
add_subdirectory(${SRC_DIR}/glibc-compatibility)
set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
-lrt -lbfd -liberty -lc -lm -ldl -pthread
-lrt -l:libbfd.a -liberty -lc -lm -ldl -pthread
glibc-compatibility
)
# Set libraries for test

View File

@ -0,0 +1,55 @@
enable_language(ASM)
include(CheckIncludeFile)
check_include_file("sys/random.h" HAVE_SYS_RANDOM_H)
if(COMPILER_CLANG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-builtin-requires-header")
endif()
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.12")
macro(add_glob cur_list)
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS ${ARGN})
list(APPEND ${cur_list} ${__tmp})
endmacro()
else ()
macro(add_glob cur_list)
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN})
list(APPEND ${cur_list} ${__tmp})
endmacro()
endif ()
macro(add_headers_and_sources prefix common_path)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
endmacro()
macro(add_headers_only prefix common_path)
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
endmacro()
add_headers_and_sources(glibc_compatibility .)
add_headers_and_sources(glibc_compatibility musl)
list (APPEND glibc_compatibility_sources musl/x86_64/syscall.s musl/x86_64/longjmp.s FastMemcpy.c)
set (musl_arch_include_dir musl/x86_64)
list(REMOVE_ITEM glibc_compatibility_sources musl/getentropy.c)
if(HAVE_SYS_RANDOM_H)
list(APPEND glibc_compatibility_sources musl/getentropy.c)
endif()
# Need to omit frame pointers to match the performance of glibc
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer")
add_library(glibc-compatibility STATIC ${glibc_compatibility_sources})
if (COMPILER_CLANG)
target_compile_options(glibc-compatibility PRIVATE -Wno-unused-command-line-argument)
elseif (COMPILER_GCC)
target_compile_options(glibc-compatibility PRIVATE -Wno-unused-but-set-variable)
endif ()
target_include_directories(glibc-compatibility PRIVATE ${musl_arch_include_dir})
message (STATUS "Some symbols from glibc will be replaced for compatibility")

View File

@ -0,0 +1,220 @@
//=====================================================================
//
// FastMemcpy.c - skywind3000@163.com, 2015
//
// feature:
// 50% speed up in avg. vs standard memcpy (tested in vc2012/gcc4.9)
//
//=====================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#if (defined(_WIN32) || defined(WIN32))
#include <windows.h>
#include <mmsystem.h>
#ifdef _MSC_VER
#pragma comment(lib, "winmm.lib")
#endif
#elif defined(__unix)
#include <sys/time.h>
#include <unistd.h>
#else
#error it can only be compiled under windows or unix
#endif
#include "FastMemcpy.h"
unsigned int gettime()
{
#if (defined(_WIN32) || defined(WIN32))
return timeGetTime();
#else
static struct timezone tz={ 0,0 };
struct timeval time;
gettimeofday(&time,&tz);
return (time.tv_sec * 1000 + time.tv_usec / 1000);
#endif
}
void sleepms(unsigned int millisec)
{
#if defined(_WIN32) || defined(WIN32)
Sleep(millisec);
#else
usleep(millisec * 1000);
#endif
}
void benchmark(int dstalign, int srcalign, size_t size, int times)
{
char *DATA1 = (char*)malloc(size + 64);
char *DATA2 = (char*)malloc(size + 64);
size_t LINEAR1 = ((size_t)DATA1);
size_t LINEAR2 = ((size_t)DATA2);
char *ALIGN1 = (char*)(((64 - (LINEAR1 & 63)) & 63) + LINEAR1);
char *ALIGN2 = (char*)(((64 - (LINEAR2 & 63)) & 63) + LINEAR2);
char *dst = (dstalign)? ALIGN1 : (ALIGN1 + 1);
char *src = (srcalign)? ALIGN2 : (ALIGN2 + 3);
unsigned int t1, t2;
int k;
sleepms(100);
t1 = gettime();
for (k = times; k > 0; k--) {
memcpy(dst, src, size);
}
t1 = gettime() - t1;
sleepms(100);
t2 = gettime();
for (k = times; k > 0; k--) {
memcpy_fast(dst, src, size);
}
t2 = gettime() - t2;
free(DATA1);
free(DATA2);
printf("result(dst %s, src %s): memcpy_fast=%dms memcpy=%d ms\n",
dstalign? "aligned" : "unalign",
srcalign? "aligned" : "unalign", (int)t2, (int)t1);
}
void bench(int copysize, int times)
{
printf("benchmark(size=%d bytes, times=%d):\n", copysize, times);
benchmark(1, 1, copysize, times);
benchmark(1, 0, copysize, times);
benchmark(0, 1, copysize, times);
benchmark(0, 0, copysize, times);
printf("\n");
}
void random_bench(int maxsize, int times)
{
static char A[11 * 1024 * 1024 + 2];
static char B[11 * 1024 * 1024 + 2];
static int random_offsets[0x10000];
static int random_sizes[0x8000];
unsigned int i, p1, p2;
unsigned int t1, t2;
for (i = 0; i < 0x10000; i++) { // generate random offsets
random_offsets[i] = rand() % (10 * 1024 * 1024 + 1);
}
for (i = 0; i < 0x8000; i++) { // generate random sizes
random_sizes[i] = 1 + rand() % maxsize;
}
sleepms(100);
t1 = gettime();
for (p1 = 0, p2 = 0, i = 0; i < times; i++) {
int offset1 = random_offsets[(p1++) & 0xffff];
int offset2 = random_offsets[(p1++) & 0xffff];
int size = random_sizes[(p2++) & 0x7fff];
memcpy(A + offset1, B + offset2, size);
}
t1 = gettime() - t1;
sleepms(100);
t2 = gettime();
for (p1 = 0, p2 = 0, i = 0; i < times; i++) {
int offset1 = random_offsets[(p1++) & 0xffff];
int offset2 = random_offsets[(p1++) & 0xffff];
int size = random_sizes[(p2++) & 0x7fff];
memcpy_fast(A + offset1, B + offset2, size);
}
t2 = gettime() - t2;
printf("benchmark random access:\n");
printf("memcpy_fast=%dms memcpy=%dms\n\n", (int)t2, (int)t1);
}
#ifdef _MSC_VER
#pragma comment(lib, "winmm.lib")
#endif
int main(void)
{
bench(32, 0x1000000);
bench(64, 0x1000000);
bench(512, 0x800000);
bench(1024, 0x400000);
bench(4096, 0x80000);
bench(8192, 0x40000);
bench(1024 * 1024 * 1, 0x800);
bench(1024 * 1024 * 4, 0x200);
bench(1024 * 1024 * 8, 0x100);
random_bench(2048, 8000000);
return 0;
}
/*
benchmark(size=32 bytes, times=16777216):
result(dst aligned, src aligned): memcpy_fast=78ms memcpy=260 ms
result(dst aligned, src unalign): memcpy_fast=78ms memcpy=250 ms
result(dst unalign, src aligned): memcpy_fast=78ms memcpy=266 ms
result(dst unalign, src unalign): memcpy_fast=78ms memcpy=234 ms
benchmark(size=64 bytes, times=16777216):
result(dst aligned, src aligned): memcpy_fast=109ms memcpy=281 ms
result(dst aligned, src unalign): memcpy_fast=109ms memcpy=328 ms
result(dst unalign, src aligned): memcpy_fast=109ms memcpy=343 ms
result(dst unalign, src unalign): memcpy_fast=93ms memcpy=344 ms
benchmark(size=512 bytes, times=8388608):
result(dst aligned, src aligned): memcpy_fast=125ms memcpy=218 ms
result(dst aligned, src unalign): memcpy_fast=156ms memcpy=484 ms
result(dst unalign, src aligned): memcpy_fast=172ms memcpy=546 ms
result(dst unalign, src unalign): memcpy_fast=172ms memcpy=515 ms
benchmark(size=1024 bytes, times=4194304):
result(dst aligned, src aligned): memcpy_fast=109ms memcpy=172 ms
result(dst aligned, src unalign): memcpy_fast=187ms memcpy=453 ms
result(dst unalign, src aligned): memcpy_fast=172ms memcpy=437 ms
result(dst unalign, src unalign): memcpy_fast=156ms memcpy=452 ms
benchmark(size=4096 bytes, times=524288):
result(dst aligned, src aligned): memcpy_fast=62ms memcpy=78 ms
result(dst aligned, src unalign): memcpy_fast=109ms memcpy=202 ms
result(dst unalign, src aligned): memcpy_fast=94ms memcpy=203 ms
result(dst unalign, src unalign): memcpy_fast=110ms memcpy=218 ms
benchmark(size=8192 bytes, times=262144):
result(dst aligned, src aligned): memcpy_fast=62ms memcpy=78 ms
result(dst aligned, src unalign): memcpy_fast=78ms memcpy=202 ms
result(dst unalign, src aligned): memcpy_fast=78ms memcpy=203 ms
result(dst unalign, src unalign): memcpy_fast=94ms memcpy=203 ms
benchmark(size=1048576 bytes, times=2048):
result(dst aligned, src aligned): memcpy_fast=203ms memcpy=191 ms
result(dst aligned, src unalign): memcpy_fast=219ms memcpy=281 ms
result(dst unalign, src aligned): memcpy_fast=218ms memcpy=328 ms
result(dst unalign, src unalign): memcpy_fast=218ms memcpy=312 ms
benchmark(size=4194304 bytes, times=512):
result(dst aligned, src aligned): memcpy_fast=312ms memcpy=406 ms
result(dst aligned, src unalign): memcpy_fast=296ms memcpy=421 ms
result(dst unalign, src aligned): memcpy_fast=312ms memcpy=468 ms
result(dst unalign, src unalign): memcpy_fast=297ms memcpy=452 ms
benchmark(size=8388608 bytes, times=256):
result(dst aligned, src aligned): memcpy_fast=281ms memcpy=452 ms
result(dst aligned, src unalign): memcpy_fast=280ms memcpy=468 ms
result(dst unalign, src aligned): memcpy_fast=298ms memcpy=514 ms
result(dst unalign, src unalign): memcpy_fast=344ms memcpy=472 ms
benchmark random access:
memcpy_fast=515ms memcpy=1014ms
*/

View File

@ -0,0 +1,694 @@
//=====================================================================
//
// FastMemcpy.c - skywind3000@163.com, 2015
//
// feature:
// 50% speed up in avg. vs standard memcpy (tested in vc2012/gcc5.1)
//
//=====================================================================
#ifndef __FAST_MEMCPY_H__
#define __FAST_MEMCPY_H__
#include <stddef.h>
#include <stdint.h>
#include <emmintrin.h>
//---------------------------------------------------------------------
// force inline for compilers
//---------------------------------------------------------------------
#ifndef INLINE
#ifdef __GNUC__
#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
#define INLINE __inline__ __attribute__((always_inline))
#else
#define INLINE __inline__
#endif
#elif defined(_MSC_VER)
#define INLINE __forceinline
#elif (defined(__BORLANDC__) || defined(__WATCOMC__))
#define INLINE __inline
#else
#define INLINE
#endif
#endif
typedef __attribute__((__aligned__(1))) uint16_t uint16_unaligned_t;
typedef __attribute__((__aligned__(1))) uint32_t uint32_unaligned_t;
typedef __attribute__((__aligned__(1))) uint64_t uint64_unaligned_t;
//---------------------------------------------------------------------
// fast copy for different sizes
//---------------------------------------------------------------------
static INLINE void memcpy_sse2_16(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
}
static INLINE void memcpy_sse2_32(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
}
static INLINE void memcpy_sse2_64(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
__m128i m2 = _mm_loadu_si128(((const __m128i*)src) + 2);
__m128i m3 = _mm_loadu_si128(((const __m128i*)src) + 3);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
_mm_storeu_si128(((__m128i*)dst) + 2, m2);
_mm_storeu_si128(((__m128i*)dst) + 3, m3);
}
static INLINE void memcpy_sse2_128(void *dst, const void *src) {
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
__m128i m2 = _mm_loadu_si128(((const __m128i*)src) + 2);
__m128i m3 = _mm_loadu_si128(((const __m128i*)src) + 3);
__m128i m4 = _mm_loadu_si128(((const __m128i*)src) + 4);
__m128i m5 = _mm_loadu_si128(((const __m128i*)src) + 5);
__m128i m6 = _mm_loadu_si128(((const __m128i*)src) + 6);
__m128i m7 = _mm_loadu_si128(((const __m128i*)src) + 7);
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
_mm_storeu_si128(((__m128i*)dst) + 2, m2);
_mm_storeu_si128(((__m128i*)dst) + 3, m3);
_mm_storeu_si128(((__m128i*)dst) + 4, m4);
_mm_storeu_si128(((__m128i*)dst) + 5, m5);
_mm_storeu_si128(((__m128i*)dst) + 6, m6);
_mm_storeu_si128(((__m128i*)dst) + 7, m7);
}
//---------------------------------------------------------------------
// tiny memory copy with jump table optimized
//---------------------------------------------------------------------
/// Attribute is used to avoid an error with undefined behaviour sanitizer
/// ../contrib/FastMemcpy/FastMemcpy.h:91:56: runtime error: applying zero offset to null pointer
/// Found by 01307_orc_output_format.sh, cause - ORCBlockInputFormat and external ORC library.
__attribute__((__no_sanitize__("undefined"))) static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) {
unsigned char *dd = ((unsigned char*)dst) + size;
const unsigned char *ss = ((const unsigned char*)src) + size;
switch (size) {
case 64:
memcpy_sse2_64(dd - 64, ss - 64);
case 0:
break;
case 65:
memcpy_sse2_64(dd - 65, ss - 65);
case 1:
dd[-1] = ss[-1];
break;
case 66:
memcpy_sse2_64(dd - 66, ss - 66);
case 2:
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 67:
memcpy_sse2_64(dd - 67, ss - 67);
case 3:
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 68:
memcpy_sse2_64(dd - 68, ss - 68);
case 4:
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 69:
memcpy_sse2_64(dd - 69, ss - 69);
case 5:
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 70:
memcpy_sse2_64(dd - 70, ss - 70);
case 6:
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 71:
memcpy_sse2_64(dd - 71, ss - 71);
case 7:
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 72:
memcpy_sse2_64(dd - 72, ss - 72);
case 8:
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
break;
case 73:
memcpy_sse2_64(dd - 73, ss - 73);
case 9:
*((uint64_unaligned_t*)(dd - 9)) = *((uint64_unaligned_t*)(ss - 9));
dd[-1] = ss[-1];
break;
case 74:
memcpy_sse2_64(dd - 74, ss - 74);
case 10:
*((uint64_unaligned_t*)(dd - 10)) = *((uint64_unaligned_t*)(ss - 10));
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 75:
memcpy_sse2_64(dd - 75, ss - 75);
case 11:
*((uint64_unaligned_t*)(dd - 11)) = *((uint64_unaligned_t*)(ss - 11));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 76:
memcpy_sse2_64(dd - 76, ss - 76);
case 12:
*((uint64_unaligned_t*)(dd - 12)) = *((uint64_unaligned_t*)(ss - 12));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 77:
memcpy_sse2_64(dd - 77, ss - 77);
case 13:
*((uint64_unaligned_t*)(dd - 13)) = *((uint64_unaligned_t*)(ss - 13));
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 78:
memcpy_sse2_64(dd - 78, ss - 78);
case 14:
*((uint64_unaligned_t*)(dd - 14)) = *((uint64_unaligned_t*)(ss - 14));
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
break;
case 79:
memcpy_sse2_64(dd - 79, ss - 79);
case 15:
*((uint64_unaligned_t*)(dd - 15)) = *((uint64_unaligned_t*)(ss - 15));
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
break;
case 80:
memcpy_sse2_64(dd - 80, ss - 80);
case 16:
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 81:
memcpy_sse2_64(dd - 81, ss - 81);
case 17:
memcpy_sse2_16(dd - 17, ss - 17);
dd[-1] = ss[-1];
break;
case 82:
memcpy_sse2_64(dd - 82, ss - 82);
case 18:
memcpy_sse2_16(dd - 18, ss - 18);
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 83:
memcpy_sse2_64(dd - 83, ss - 83);
case 19:
memcpy_sse2_16(dd - 19, ss - 19);
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 84:
memcpy_sse2_64(dd - 84, ss - 84);
case 20:
memcpy_sse2_16(dd - 20, ss - 20);
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 85:
memcpy_sse2_64(dd - 85, ss - 85);
case 21:
memcpy_sse2_16(dd - 21, ss - 21);
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 86:
memcpy_sse2_64(dd - 86, ss - 86);
case 22:
memcpy_sse2_16(dd - 22, ss - 22);
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 87:
memcpy_sse2_64(dd - 87, ss - 87);
case 23:
memcpy_sse2_16(dd - 23, ss - 23);
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 88:
memcpy_sse2_64(dd - 88, ss - 88);
case 24:
memcpy_sse2_16(dd - 24, ss - 24);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 89:
memcpy_sse2_64(dd - 89, ss - 89);
case 25:
memcpy_sse2_16(dd - 25, ss - 25);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 90:
memcpy_sse2_64(dd - 90, ss - 90);
case 26:
memcpy_sse2_16(dd - 26, ss - 26);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 91:
memcpy_sse2_64(dd - 91, ss - 91);
case 27:
memcpy_sse2_16(dd - 27, ss - 27);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 92:
memcpy_sse2_64(dd - 92, ss - 92);
case 28:
memcpy_sse2_16(dd - 28, ss - 28);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 93:
memcpy_sse2_64(dd - 93, ss - 93);
case 29:
memcpy_sse2_16(dd - 29, ss - 29);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 94:
memcpy_sse2_64(dd - 94, ss - 94);
case 30:
memcpy_sse2_16(dd - 30, ss - 30);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 95:
memcpy_sse2_64(dd - 95, ss - 95);
case 31:
memcpy_sse2_16(dd - 31, ss - 31);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 96:
memcpy_sse2_64(dd - 96, ss - 96);
case 32:
memcpy_sse2_32(dd - 32, ss - 32);
break;
case 97:
memcpy_sse2_64(dd - 97, ss - 97);
case 33:
memcpy_sse2_32(dd - 33, ss - 33);
dd[-1] = ss[-1];
break;
case 98:
memcpy_sse2_64(dd - 98, ss - 98);
case 34:
memcpy_sse2_32(dd - 34, ss - 34);
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 99:
memcpy_sse2_64(dd - 99, ss - 99);
case 35:
memcpy_sse2_32(dd - 35, ss - 35);
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 100:
memcpy_sse2_64(dd - 100, ss - 100);
case 36:
memcpy_sse2_32(dd - 36, ss - 36);
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 101:
memcpy_sse2_64(dd - 101, ss - 101);
case 37:
memcpy_sse2_32(dd - 37, ss - 37);
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 102:
memcpy_sse2_64(dd - 102, ss - 102);
case 38:
memcpy_sse2_32(dd - 38, ss - 38);
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 103:
memcpy_sse2_64(dd - 103, ss - 103);
case 39:
memcpy_sse2_32(dd - 39, ss - 39);
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 104:
memcpy_sse2_64(dd - 104, ss - 104);
case 40:
memcpy_sse2_32(dd - 40, ss - 40);
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
break;
case 105:
memcpy_sse2_64(dd - 105, ss - 105);
case 41:
memcpy_sse2_32(dd - 41, ss - 41);
*((uint64_unaligned_t*)(dd - 9)) = *((uint64_unaligned_t*)(ss - 9));
dd[-1] = ss[-1];
break;
case 106:
memcpy_sse2_64(dd - 106, ss - 106);
case 42:
memcpy_sse2_32(dd - 42, ss - 42);
*((uint64_unaligned_t*)(dd - 10)) = *((uint64_unaligned_t*)(ss - 10));
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 107:
memcpy_sse2_64(dd - 107, ss - 107);
case 43:
memcpy_sse2_32(dd - 43, ss - 43);
*((uint64_unaligned_t*)(dd - 11)) = *((uint64_unaligned_t*)(ss - 11));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 108:
memcpy_sse2_64(dd - 108, ss - 108);
case 44:
memcpy_sse2_32(dd - 44, ss - 44);
*((uint64_unaligned_t*)(dd - 12)) = *((uint64_unaligned_t*)(ss - 12));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 109:
memcpy_sse2_64(dd - 109, ss - 109);
case 45:
memcpy_sse2_32(dd - 45, ss - 45);
*((uint64_unaligned_t*)(dd - 13)) = *((uint64_unaligned_t*)(ss - 13));
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 110:
memcpy_sse2_64(dd - 110, ss - 110);
case 46:
memcpy_sse2_32(dd - 46, ss - 46);
*((uint64_unaligned_t*)(dd - 14)) = *((uint64_unaligned_t*)(ss - 14));
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
break;
case 111:
memcpy_sse2_64(dd - 111, ss - 111);
case 47:
memcpy_sse2_32(dd - 47, ss - 47);
*((uint64_unaligned_t*)(dd - 15)) = *((uint64_unaligned_t*)(ss - 15));
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
break;
case 112:
memcpy_sse2_64(dd - 112, ss - 112);
case 48:
memcpy_sse2_32(dd - 48, ss - 48);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 113:
memcpy_sse2_64(dd - 113, ss - 113);
case 49:
memcpy_sse2_32(dd - 49, ss - 49);
memcpy_sse2_16(dd - 17, ss - 17);
dd[-1] = ss[-1];
break;
case 114:
memcpy_sse2_64(dd - 114, ss - 114);
case 50:
memcpy_sse2_32(dd - 50, ss - 50);
memcpy_sse2_16(dd - 18, ss - 18);
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 115:
memcpy_sse2_64(dd - 115, ss - 115);
case 51:
memcpy_sse2_32(dd - 51, ss - 51);
memcpy_sse2_16(dd - 19, ss - 19);
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
dd[-1] = ss[-1];
break;
case 116:
memcpy_sse2_64(dd - 116, ss - 116);
case 52:
memcpy_sse2_32(dd - 52, ss - 52);
memcpy_sse2_16(dd - 20, ss - 20);
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 117:
memcpy_sse2_64(dd - 117, ss - 117);
case 53:
memcpy_sse2_32(dd - 53, ss - 53);
memcpy_sse2_16(dd - 21, ss - 21);
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
dd[-1] = ss[-1];
break;
case 118:
memcpy_sse2_64(dd - 118, ss - 118);
case 54:
memcpy_sse2_32(dd - 54, ss - 54);
memcpy_sse2_16(dd - 22, ss - 22);
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
break;
case 119:
memcpy_sse2_64(dd - 119, ss - 119);
case 55:
memcpy_sse2_32(dd - 55, ss - 55);
memcpy_sse2_16(dd - 23, ss - 23);
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
break;
case 120:
memcpy_sse2_64(dd - 120, ss - 120);
case 56:
memcpy_sse2_32(dd - 56, ss - 56);
memcpy_sse2_16(dd - 24, ss - 24);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 121:
memcpy_sse2_64(dd - 121, ss - 121);
case 57:
memcpy_sse2_32(dd - 57, ss - 57);
memcpy_sse2_16(dd - 25, ss - 25);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 122:
memcpy_sse2_64(dd - 122, ss - 122);
case 58:
memcpy_sse2_32(dd - 58, ss - 58);
memcpy_sse2_16(dd - 26, ss - 26);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 123:
memcpy_sse2_64(dd - 123, ss - 123);
case 59:
memcpy_sse2_32(dd - 59, ss - 59);
memcpy_sse2_16(dd - 27, ss - 27);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 124:
memcpy_sse2_64(dd - 124, ss - 124);
case 60:
memcpy_sse2_32(dd - 60, ss - 60);
memcpy_sse2_16(dd - 28, ss - 28);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 125:
memcpy_sse2_64(dd - 125, ss - 125);
case 61:
memcpy_sse2_32(dd - 61, ss - 61);
memcpy_sse2_16(dd - 29, ss - 29);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 126:
memcpy_sse2_64(dd - 126, ss - 126);
case 62:
memcpy_sse2_32(dd - 62, ss - 62);
memcpy_sse2_16(dd - 30, ss - 30);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 127:
memcpy_sse2_64(dd - 127, ss - 127);
case 63:
memcpy_sse2_32(dd - 63, ss - 63);
memcpy_sse2_16(dd - 31, ss - 31);
memcpy_sse2_16(dd - 16, ss - 16);
break;
case 128:
memcpy_sse2_128(dd - 128, ss - 128);
break;
}
return dst;
}
//---------------------------------------------------------------------
// main routine
//---------------------------------------------------------------------
static void* memcpy_fast(void *destination, const void *source, size_t size)
{
unsigned char *dst = (unsigned char*)destination;
const unsigned char *src = (const unsigned char*)source;
static size_t cachesize = 0x200000; // L2-cache size
size_t padding;
// small memory copy
if (size <= 128) {
return memcpy_tiny(dst, src, size);
}
// align destination to 16 bytes boundary
padding = (16 - (((size_t)dst) & 15)) & 15;
if (padding > 0) {
__m128i head = _mm_loadu_si128((const __m128i*)src);
_mm_storeu_si128((__m128i*)dst, head);
dst += padding;
src += padding;
size -= padding;
}
// medium size copy
if (size <= cachesize) {
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
for (; size >= 128; size -= 128) {
c0 = _mm_loadu_si128(((const __m128i*)src) + 0);
c1 = _mm_loadu_si128(((const __m128i*)src) + 1);
c2 = _mm_loadu_si128(((const __m128i*)src) + 2);
c3 = _mm_loadu_si128(((const __m128i*)src) + 3);
c4 = _mm_loadu_si128(((const __m128i*)src) + 4);
c5 = _mm_loadu_si128(((const __m128i*)src) + 5);
c6 = _mm_loadu_si128(((const __m128i*)src) + 6);
c7 = _mm_loadu_si128(((const __m128i*)src) + 7);
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
src += 128;
_mm_store_si128((((__m128i*)dst) + 0), c0);
_mm_store_si128((((__m128i*)dst) + 1), c1);
_mm_store_si128((((__m128i*)dst) + 2), c2);
_mm_store_si128((((__m128i*)dst) + 3), c3);
_mm_store_si128((((__m128i*)dst) + 4), c4);
_mm_store_si128((((__m128i*)dst) + 5), c5);
_mm_store_si128((((__m128i*)dst) + 6), c6);
_mm_store_si128((((__m128i*)dst) + 7), c7);
dst += 128;
}
}
else { // big memory copy
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
_mm_prefetch((const char*)(src), _MM_HINT_NTA);
if ((((size_t)src) & 15) == 0) { // source aligned
for (; size >= 128; size -= 128) {
c0 = _mm_load_si128(((const __m128i*)src) + 0);
c1 = _mm_load_si128(((const __m128i*)src) + 1);
c2 = _mm_load_si128(((const __m128i*)src) + 2);
c3 = _mm_load_si128(((const __m128i*)src) + 3);
c4 = _mm_load_si128(((const __m128i*)src) + 4);
c5 = _mm_load_si128(((const __m128i*)src) + 5);
c6 = _mm_load_si128(((const __m128i*)src) + 6);
c7 = _mm_load_si128(((const __m128i*)src) + 7);
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
src += 128;
_mm_stream_si128((((__m128i*)dst) + 0), c0);
_mm_stream_si128((((__m128i*)dst) + 1), c1);
_mm_stream_si128((((__m128i*)dst) + 2), c2);
_mm_stream_si128((((__m128i*)dst) + 3), c3);
_mm_stream_si128((((__m128i*)dst) + 4), c4);
_mm_stream_si128((((__m128i*)dst) + 5), c5);
_mm_stream_si128((((__m128i*)dst) + 6), c6);
_mm_stream_si128((((__m128i*)dst) + 7), c7);
dst += 128;
}
}
else { // source unaligned
for (; size >= 128; size -= 128) {
c0 = _mm_loadu_si128(((const __m128i*)src) + 0);
c1 = _mm_loadu_si128(((const __m128i*)src) + 1);
c2 = _mm_loadu_si128(((const __m128i*)src) + 2);
c3 = _mm_loadu_si128(((const __m128i*)src) + 3);
c4 = _mm_loadu_si128(((const __m128i*)src) + 4);
c5 = _mm_loadu_si128(((const __m128i*)src) + 5);
c6 = _mm_loadu_si128(((const __m128i*)src) + 6);
c7 = _mm_loadu_si128(((const __m128i*)src) + 7);
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
src += 128;
_mm_stream_si128((((__m128i*)dst) + 0), c0);
_mm_stream_si128((((__m128i*)dst) + 1), c1);
_mm_stream_si128((((__m128i*)dst) + 2), c2);
_mm_stream_si128((((__m128i*)dst) + 3), c3);
_mm_stream_si128((((__m128i*)dst) + 4), c4);
_mm_stream_si128((((__m128i*)dst) + 5), c5);
_mm_stream_si128((((__m128i*)dst) + 6), c6);
_mm_stream_si128((((__m128i*)dst) + 7), c7);
dst += 128;
}
}
_mm_sfence();
}
memcpy_tiny(dst, src, size);
return destination;
}
#endif

View File

@ -0,0 +1,203 @@
Copyright 2016-2020 Yandex LLC
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2016-2020 Yandex LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2015 Linwei
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,186 @@
/** Allows to build programs with libc 2.27 and run on systems with at least libc 2.4,
* such as Ubuntu Hardy or CentOS 5.
*
* Also look at http://www.lightofdawn.org/wiki/wiki.cgi/NewAppsOnOldGlibc
*/
#if defined (__cplusplus)
extern "C" {
#endif
#include <pthread.h>
size_t __pthread_get_minstack(const pthread_attr_t * attr)
{
return 1048576; /// This is a guess. Don't sure it is correct.
}
#include <signal.h>
#include <unistd.h>
#include <string.h>
#include <sys/syscall.h>
long int syscall(long int __sysno, ...) __THROW;
int __gai_sigqueue(int sig, const union sigval val, pid_t caller_pid)
{
siginfo_t info;
memset(&info, 0, sizeof(siginfo_t));
info.si_signo = sig;
info.si_code = SI_ASYNCNL;
info.si_pid = caller_pid;
info.si_uid = getuid();
info.si_value = val;
return syscall(__NR_rt_sigqueueinfo, info.si_pid, sig, &info);
}
#include <sys/select.h>
#include <stdlib.h>
#include <features.h>
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16)
long int __fdelt_chk(long int d)
{
if (d < 0)
abort();
#else
unsigned long int __fdelt_chk(unsigned long int d)
{
#endif
if (d >= FD_SETSIZE)
abort();
return d / __NFDBITS;
}
#include <sys/poll.h>
#include <stddef.h>
int __poll_chk(struct pollfd * fds, nfds_t nfds, int timeout, size_t fdslen)
{
if (fdslen / sizeof(*fds) < nfds)
abort();
return poll(fds, nfds, timeout);
}
#include <setjmp.h>
void musl_glibc_longjmp(jmp_buf env, int val);
/// NOTE This disables some of FORTIFY_SOURCE functionality.
void __longjmp_chk(jmp_buf env, int val)
{
musl_glibc_longjmp(env, val);
}
#include <stdarg.h>
int vasprintf(char **s, const char *fmt, va_list ap);
int __vasprintf_chk(char **s, int unused, const char *fmt, va_list ap)
{
return vasprintf(s, fmt, ap);
}
int __asprintf_chk(char **result_ptr, int unused, const char *format, ...)
{
int ret;
va_list ap;
va_start (ap, format);
ret = vasprintf(result_ptr, format, ap);
va_end (ap);
return ret;
}
int vdprintf(int fd, const char *format, va_list ap);
int __dprintf_chk (int d, int unused, const char *format, ...)
{
int ret;
va_list ap;
va_start (ap, format);
ret = vdprintf(d, format, ap);
va_end (ap);
return ret;
}
size_t fread(void *ptr, size_t size, size_t nmemb, void *stream);
size_t __fread_chk(void *ptr, size_t unused, size_t size, size_t nmemb, void *stream)
{
return fread(ptr, size, nmemb, stream);
}
int vsscanf(const char *str, const char *format, va_list ap);
int __isoc99_vsscanf(const char *str, const char *format, va_list ap)
{
return vsscanf(str, format, ap);
}
int sscanf(const char *restrict s, const char *restrict fmt, ...)
{
int ret;
va_list ap;
va_start(ap, fmt);
ret = vsscanf(s, fmt, ap);
va_end(ap);
return ret;
}
int __isoc99_sscanf(const char *str, const char *format, ...) __attribute__((weak, nonnull, nothrow, alias("sscanf")));
int open(const char *path, int oflag);
int __open_2(const char *path, int oflag)
{
return open(path, oflag);
}
/// No-ops.
int pthread_setname_np(pthread_t thread, const char *name) { return 0; }
int pthread_getname_np(pthread_t thread, char *name, size_t len) { name[0] = '\0'; return 0; };
#define SHMDIR "/dev/shm/"
const char * __shm_directory(size_t * len)
{
*len = sizeof(SHMDIR) - 1;
return SHMDIR;
}
/// https://boringssl.googlesource.com/boringssl/+/ad1907fe73334d6c696c8539646c21b11178f20f%5E!/#F0
/* Copyright (c) 2015, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
void __attribute__((__weak__)) explicit_bzero(void * buf, size_t len)
{
memset(buf, 0, len);
__asm__ __volatile__("" :: "r"(buf) : "memory");
}
void __explicit_bzero_chk(void * buf, size_t len, size_t unused)
{
return explicit_bzero(buf, len);
}
#if defined (__cplusplus)
}
#endif

View File

@ -0,0 +1,6 @@
#include "FastMemcpy.h"
void * memcpy(void * __restrict destination, const void * __restrict source, size_t size)
{
return memcpy_fast(destination, source, size);
}

View File

@ -0,0 +1,163 @@
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2014 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
Alex Dowad
Alexander Monakov
Anthony G. Basile
Arvid Picciani
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Denys Vlasenko
Emil Renner Berthing
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Josiah Worcester
Justin Cormack
Khem Raj
Kylie McClain
Luca Barbato
Luka Perkov
M Farkas-Dyck (Strake)
Mahesh Bodapati
Michael Forney
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Petr Hosek
Pierre Carrier
Rich Felker
Richard Pennington
Shiz
sin
Solar Designer
Stefan Kristiansson
Szabolcs Nagy
Timo Teräs
Trutz Behn
Valentin Ochs
William Haddon
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Valentin Ochs and is licensed under an MIT-style license.
The BSD PRNG implementation (src/prng/random.c) and XSI search API
(src/search/*.c) functions are Copyright © 2011 Szabolcs Nagy and
licensed under following terms: "Permission to use, copy, modify,
and/or distribute this code for any purpose with or without fee is
hereby granted. There is no warranty."
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.

View File

@ -0,0 +1,8 @@
Tiny pieces extracted from MUSL library.
git://git.musl-libc.org/musl
c10bc61508dc52b8315084e628f36a6c3c2dabb1
NOTE: Files was edited.
NOTE: Math related files are pulled from commit 6ad514e4e278f0c3b18eb2db1d45638c9af1c07f.

View File

@ -0,0 +1,6 @@
#include "libm.h"
double __math_divzero(uint32_t sign)
{
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
float __math_divzerof(uint32_t sign)
{
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
double __math_invalid(double x)
{
return (x - x) / (x - x);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
float __math_invalidf(float x)
{
return (x - x) / (x - x);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
double __math_oflow(uint32_t sign)
{
return __math_xflow(sign, 0x1p769);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
float __math_oflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p97f);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
double __math_uflow(uint32_t sign)
{
return __math_xflow(sign, 0x1p-767);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
float __math_uflowf(uint32_t sign)
{
return __math_xflowf(sign, 0x1p-95f);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
double __math_xflow(uint32_t sign, double y)
{
return eval_as_double(fp_barrier(sign ? -y : y) * y);
}

View File

@ -0,0 +1,6 @@
#include "libm.h"
float __math_xflowf(uint32_t sign, float y)
{
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
}

View File

@ -0,0 +1,82 @@
#define a_ll a_ll
static inline int a_ll(volatile int *p)
{
int v;
__asm__ __volatile__ ("ldaxr %w0,%1" : "=r"(v) : "Q"(*p));
return v;
}
#define a_sc a_sc
static inline int a_sc(volatile int *p, int v)
{
int r;
__asm__ __volatile__ ("stlxr %w0,%w2,%1" : "=&r"(r), "=Q"(*p) : "r"(v) : "memory");
return !r;
}
#define a_barrier a_barrier
static inline void a_barrier()
{
__asm__ __volatile__ ("dmb ish" : : : "memory");
}
#define a_cas a_cas
static inline int a_cas(volatile int *p, int t, int s)
{
int old;
do {
old = a_ll(p);
if (old != t) {
a_barrier();
break;
}
} while (!a_sc(p, s));
return old;
}
#define a_ll_p a_ll_p
static inline void *a_ll_p(volatile void *p)
{
void *v;
__asm__ __volatile__ ("ldaxr %0, %1" : "=r"(v) : "Q"(*(void *volatile *)p));
return v;
}
#define a_sc_p a_sc_p
static inline int a_sc_p(volatile int *p, void *v)
{
int r;
__asm__ __volatile__ ("stlxr %w0,%2,%1" : "=&r"(r), "=Q"(*(void *volatile *)p) : "r"(v) : "memory");
return !r;
}
#define a_cas_p a_cas_p
static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
void *old;
do {
old = a_ll_p(p);
if (old != t) {
a_barrier();
break;
}
} while (!a_sc_p(p, s));
return old;
}
#define a_ctz_64 a_ctz_64
static inline int a_ctz_64(uint64_t x)
{
__asm__(
" rbit %0, %1\n"
" clz %0, %0\n"
: "=r"(x) : "r"(x));
return x;
}
#define a_clz_64 a_clz_64
static inline int a_clz_64(uint64_t x)
{
__asm__("clz %0, %1" : "=r"(x) : "r"(x));
return x;
}

View File

@ -0,0 +1,21 @@
.global musl_glibc_longjmp
.type musl_glibc_longjmp,@function
musl_glibc_longjmp:
// IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers
ldp x19, x20, [x0,#0]
ldp x21, x22, [x0,#16]
ldp x23, x24, [x0,#32]
ldp x25, x26, [x0,#48]
ldp x27, x28, [x0,#64]
ldp x29, x30, [x0,#80]
ldr x2, [x0,#104]
mov sp, x2
ldp d8 , d9, [x0,#112]
ldp d10, d11, [x0,#128]
ldp d12, d13, [x0,#144]
ldp d14, d15, [x0,#160]
mov x0, x1
cbnz x1, 1f
mov x0, #1
1: br x30

View File

@ -0,0 +1,14 @@
.global __syscall
.hidden __syscall
.type __syscall,%function
__syscall:
uxtw x8,w0
mov x0,x1
mov x1,x2
mov x2,x3
mov x3,x4
mov x4,x5
mov x5,x6
mov x6,x7
svc 0
ret

View File

@ -0,0 +1,3 @@
#define VDSO_USEFUL
#define VDSO_CGT_SYM "__kernel_clock_gettime"
#define VDSO_CGT_VER "LINUX_2.6.39"

View File

@ -0,0 +1,318 @@
#ifndef _ATOMIC_H
#define _ATOMIC_H
#include <stdint.h>
#include "atomic_arch.h"
#ifdef a_ll
#ifndef a_pre_llsc
#define a_pre_llsc()
#endif
#ifndef a_post_llsc
#define a_post_llsc()
#endif
#ifndef a_cas
#define a_cas a_cas
static inline int a_cas(volatile int *p, int t, int s)
{
int old;
a_pre_llsc();
do old = a_ll(p);
while (old==t && !a_sc(p, s));
a_post_llsc();
return old;
}
#endif
#ifndef a_swap
#define a_swap a_swap
static inline int a_swap(volatile int *p, int v)
{
int old;
a_pre_llsc();
do old = a_ll(p);
while (!a_sc(p, v));
a_post_llsc();
return old;
}
#endif
#ifndef a_fetch_add
#define a_fetch_add a_fetch_add
static inline int a_fetch_add(volatile int *p, int v)
{
int old;
a_pre_llsc();
do old = a_ll(p);
while (!a_sc(p, (unsigned)old + v));
a_post_llsc();
return old;
}
#endif
#ifndef a_fetch_and
#define a_fetch_and a_fetch_and
static inline int a_fetch_and(volatile int *p, int v)
{
int old;
a_pre_llsc();
do old = a_ll(p);
while (!a_sc(p, old & v));
a_post_llsc();
return old;
}
#endif
#ifndef a_fetch_or
#define a_fetch_or a_fetch_or
static inline int a_fetch_or(volatile int *p, int v)
{
int old;
a_pre_llsc();
do old = a_ll(p);
while (!a_sc(p, old | v));
a_post_llsc();
return old;
}
#endif
#endif
#ifdef a_ll_p
#ifndef a_cas_p
#define a_cas_p a_cas_p
static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
void *old;
a_pre_llsc();
do old = a_ll_p(p);
while (old==t && !a_sc_p(p, s));
a_post_llsc();
return old;
}
#endif
#endif
#ifndef a_cas
#error missing definition of a_cas
#endif
#ifndef a_swap
#define a_swap a_swap
static inline int a_swap(volatile int *p, int v)
{
int old;
do old = *p;
while (a_cas(p, old, v) != old);
return old;
}
#endif
#ifndef a_fetch_add
#define a_fetch_add a_fetch_add
static inline int a_fetch_add(volatile int *p, int v)
{
int old;
do old = *p;
while (a_cas(p, old, (unsigned)old+v) != old);
return old;
}
#endif
#ifndef a_fetch_and
#define a_fetch_and a_fetch_and
static inline int a_fetch_and(volatile int *p, int v)
{
int old;
do old = *p;
while (a_cas(p, old, old&v) != old);
return old;
}
#endif
#ifndef a_fetch_or
#define a_fetch_or a_fetch_or
static inline int a_fetch_or(volatile int *p, int v)
{
int old;
do old = *p;
while (a_cas(p, old, old|v) != old);
return old;
}
#endif
#ifndef a_and
#define a_and a_and
static inline void a_and(volatile int *p, int v)
{
a_fetch_and(p, v);
}
#endif
#ifndef a_or
#define a_or a_or
static inline void a_or(volatile int *p, int v)
{
a_fetch_or(p, v);
}
#endif
#ifndef a_inc
#define a_inc a_inc
static inline void a_inc(volatile int *p)
{
a_fetch_add(p, 1);
}
#endif
#ifndef a_dec
#define a_dec a_dec
static inline void a_dec(volatile int *p)
{
a_fetch_add(p, -1);
}
#endif
#ifndef a_store
#define a_store a_store
static inline void a_store(volatile int *p, int v)
{
#ifdef a_barrier
a_barrier();
*p = v;
a_barrier();
#else
a_swap(p, v);
#endif
}
#endif
#ifndef a_barrier
#define a_barrier a_barrier
static void a_barrier()
{
volatile int tmp = 0;
a_cas(&tmp, 0, 0);
}
#endif
#ifndef a_spin
#define a_spin a_barrier
#endif
#ifndef a_and_64
#define a_and_64 a_and_64
static inline void a_and_64(volatile uint64_t *p, uint64_t v)
{
union { uint64_t v; uint32_t r[2]; } u = { v };
if (u.r[0]+1) a_and((int *)p, u.r[0]);
if (u.r[1]+1) a_and((int *)p+1, u.r[1]);
}
#endif
#ifndef a_or_64
#define a_or_64 a_or_64
static inline void a_or_64(volatile uint64_t *p, uint64_t v)
{
union { uint64_t v; uint32_t r[2]; } u = { v };
if (u.r[0]) a_or((int *)p, u.r[0]);
if (u.r[1]) a_or((int *)p+1, u.r[1]);
}
#endif
#ifndef a_cas_p
typedef char a_cas_p_undefined_but_pointer_not_32bit[-sizeof(char) == 0xffffffff ? 1 : -1];
#define a_cas_p a_cas_p
static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
return (void *)a_cas((volatile int *)p, (int)t, (int)s);
}
#endif
#ifndef a_or_l
#define a_or_l a_or_l
static inline void a_or_l(volatile void *p, long v)
{
if (sizeof(long) == sizeof(int)) a_or(p, v);
else a_or_64(p, v);
}
#endif
#ifndef a_crash
#define a_crash a_crash
static inline void a_crash()
{
*(volatile char *)0=0;
}
#endif
#ifndef a_ctz_32
#define a_ctz_32 a_ctz_32
static inline int a_ctz_32(uint32_t x)
{
#ifdef a_clz_32
return 31-a_clz_32(x&-x);
#else
static const char debruijn32[32] = {
0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
};
return debruijn32[(x&-x)*0x076be629 >> 27];
#endif
}
#endif
#ifndef a_ctz_64
#define a_ctz_64 a_ctz_64
static inline int a_ctz_64(uint64_t x)
{
static const char debruijn64[64] = {
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
};
if (sizeof(long) < 8) {
uint32_t y = x;
if (!y) {
y = x>>32;
return 32 + a_ctz_32(y);
}
return a_ctz_32(y);
}
return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
}
#endif
static inline int a_ctz_l(unsigned long x)
{
return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
}
#ifndef a_clz_64
#define a_clz_64 a_clz_64
static inline int a_clz_64(uint64_t x)
{
#ifdef a_clz_32
if (x>>32)
return a_clz_32(x>>32);
return a_clz_32(x) + 32;
#else
uint32_t y;
int r;
if (x>>32) y=x>>32, r=0; else y=x, r=32;
if (y>>16) y>>=16; else r |= 16;
if (y>>8) y>>=8; else r |= 8;
if (y>>4) y>>=4; else r |= 4;
if (y>>2) y>>=2; else r |= 2;
return r | !(y>>1);
#endif
}
#endif
#endif

View File

@ -0,0 +1,22 @@
#include <time.h>
#include <unistd.h>
#include "syscall.h"
int clock_getres(clockid_t clk, struct timespec *ts)
{
#ifdef SYS_clock_getres_time64
/* On a 32-bit arch, use the old syscall if it exists. */
if (SYS_clock_getres != SYS_clock_getres_time64) {
long ts32[2];
int r = __syscall(SYS_clock_getres, clk, ts32);
if (!r && ts) {
ts->tv_sec = ts32[0];
ts->tv_nsec = ts32[1];
}
return __syscall_ret(r);
}
#endif
/* If reaching this point, it's a 64-bit arch or time64-only
* 32-bit arch and we can get result directly into timespec. */
return syscall(SYS_clock_getres, clk, ts);
}

View File

@ -0,0 +1,105 @@
#include <errno.h>
#include <stdint.h>
#include <time.h>
#include "atomic.h"
#include "syscall.h"
#ifdef VDSO_CGT_SYM
static void *volatile vdso_func;
#ifdef VDSO_CGT32_SYM
static void *volatile vdso_func_32;
static int cgt_time32_wrap(clockid_t clk, struct timespec *ts)
{
long ts32[2];
int (*f)(clockid_t, long[2]) =
(int (*)(clockid_t, long[2]))vdso_func_32;
int r = f(clk, ts32);
if (!r) {
/* Fallback to syscalls if time32 overflowed. Maybe
* we lucked out and somehow migrated to a kernel with
* time64 syscalls available. */
if (ts32[0] < 0) {
a_cas_p(&vdso_func, (void *)cgt_time32_wrap, 0);
return -ENOSYS;
}
ts->tv_sec = ts32[0];
ts->tv_nsec = ts32[1];
}
return r;
}
#endif
static int cgt_init(clockid_t clk, struct timespec *ts)
{
void *p = __vdsosym(VDSO_CGT_VER, VDSO_CGT_SYM);
#ifdef VDSO_CGT32_SYM
if (!p) {
void *q = __vdsosym(VDSO_CGT32_VER, VDSO_CGT32_SYM);
if (q) {
a_cas_p(&vdso_func_32, 0, q);
p = cgt_time32_wrap;
}
}
#endif
int (*f)(clockid_t, struct timespec *) =
(int (*)(clockid_t, struct timespec *))p;
a_cas_p(&vdso_func, (void *)cgt_init, p);
return f ? f(clk, ts) : -ENOSYS;
}
static void *volatile vdso_func = (void *)cgt_init;
#endif
int clock_gettime(clockid_t clk, struct timespec *ts)
{
int r;
#ifdef VDSO_CGT_SYM
int (*f)(clockid_t, struct timespec *) =
(int (*)(clockid_t, struct timespec *))vdso_func;
if (f) {
r = f(clk, ts);
if (!r) return r;
if (r == -EINVAL) return __syscall_ret(r);
/* Fall through on errors other than EINVAL. Some buggy
* vdso implementations return ENOSYS for clocks they
* can't handle, rather than making the syscall. This
* also handles the case where cgt_init fails to find
* a vdso function to use. */
}
#endif
#ifdef SYS_clock_gettime64
r = -ENOSYS;
if (sizeof(time_t) > 4)
r = __syscall(SYS_clock_gettime64, clk, ts);
if (SYS_clock_gettime == SYS_clock_gettime64 || r!=-ENOSYS)
return __syscall_ret(r);
long ts32[2];
r = __syscall(SYS_clock_gettime, clk, ts32);
if (r==-ENOSYS && clk==CLOCK_REALTIME) {
r = __syscall(SYS_gettimeofday, ts32, 0);
ts32[1] *= 1000;
}
if (!r) {
ts->tv_sec = ts32[0];
ts->tv_nsec = ts32[1];
return r;
}
return __syscall_ret(r);
#else
r = __syscall(SYS_clock_gettime, clk, ts);
if (r == -ENOSYS) {
if (clk == CLOCK_REALTIME) {
__syscall(SYS_gettimeofday, ts, 0);
ts->tv_nsec = (int)ts->tv_nsec * 1000;
return 0;
}
r = -EINVAL;
}
return __syscall_ret(r);
#endif
}

View File

@ -0,0 +1,24 @@
#include <errno.h>
#include <pthread.h>
#include <time.h>
#include "syscall.h"
int clock_nanosleep(clockid_t clk, int flags, const struct timespec * req, struct timespec * rem)
{
if (clk == CLOCK_THREAD_CPUTIME_ID)
return EINVAL;
int old_cancel_type;
int status;
/// We cannot port __syscall_cp because musl has very limited cancellation point implementation.
/// For example, c++ destructors won't get called and exception unwinding isn't implemented.
/// Instead, we use normal __syscall here and turn on the asynchrous cancel mode to allow
/// cancel. This works because nanosleep doesn't contain any resource allocations or
/// deallocations.
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_cancel_type);
if (clk == CLOCK_REALTIME && !flags)
status = -__syscall(SYS_nanosleep, req, rem);
else
status = -__syscall(SYS_clock_nanosleep, clk, flags, req, rem);
pthread_setcanceltype(old_cancel_type, NULL);
return status;
}

View File

@ -0,0 +1,134 @@
/*
* Double-precision e^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
double exp(double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (asuint64(x) >> 63)
return __math_uflow(0);
else
return __math_oflow(0);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}

View File

@ -0,0 +1,121 @@
/*
* Double-precision 2^x function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
#define C1 __exp_data.exp2_poly[0]
#define C2 __exp_data.exp2_poly[1]
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble(sbits);
y = 2 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble(sbits);
y = scale + scale * tmp;
if (y < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double(hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
double exp2(double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
double_t kd, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12(1024.0)) {
if (asuint64(x) == asuint64(-INFINITY))
return 0.0;
if (abstop >= top12(INFINITY))
return 1.0 + x;
if (!(asuint64(x) >> 63))
return __math_oflow(0);
else if (asuint64(x) >= asuint64(-1075.0))
return __math_uflow(0);
}
if (2 * asuint64(x) > 2 * asuint64(928.0))
/* Large x is special cased below. */
abstop = 0;
}
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double(x + Shift);
ki = asuint64(kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}

View File

@ -0,0 +1,69 @@
/*
* Single-precision 2^x function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
Wrong count: 168353 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
static inline uint32_t top12(float x)
{
return asuint(x) >> 20;
}
float exp2f(float x)
{
uint32_t abstop;
uint64_t ki, t;
double_t kd, xd, z, r, r2, y, s;
xd = (double_t)x;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop >= top12(128.0f))) {
/* |x| >= 128 or x is nan. */
if (asuint(x) == asuint(-INFINITY))
return 0.0f;
if (abstop >= top12(INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf(0);
if (x <= -150.0f)
return __math_uflowf(0);
}
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}

View File

@ -0,0 +1,35 @@
/*
* Shared data between expf, exp2f and powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp2f_data.h"
#define N (1 << EXP2F_TABLE_BITS)
const struct exp2f_data __exp2f_data = {
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab = {
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
},
.shift_scaled = 0x1.8p+52 / N,
.poly = {
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = {
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
},
};

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP2F_DATA_H
#define _EXP2F_DATA_H
#include "musl_features.h"
#include <stdint.h>
/* Shared between expf, exp2f and powf. */
#define EXP2F_TABLE_BITS 5
#define EXP2F_POLY_ORDER 3
extern hidden const struct exp2f_data {
uint64_t tab[1 << EXP2F_TABLE_BITS];
double shift_scaled;
double poly[EXP2F_POLY_ORDER];
double shift;
double invln2_scaled;
double poly_scaled[EXP2F_POLY_ORDER];
} __exp2f_data;
#endif

View File

@ -0,0 +1,182 @@
/*
* Shared data between exp, exp2 and pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "exp_data.h"
#define N (1 << EXP_TABLE_BITS)
const struct exp_data __exp_data = {
// N/ln2
.invln2N = 0x1.71547652b82fep0 * N,
// -ln2/N
.negln2hiN = -0x1.62e42fefa0000p-8,
.negln2loN = -0x1.cf79abc9e3b3ap-47,
// Used for rounding when !TOINT_INTRINSICS
#if EXP_USE_TOINT_NARROW
.shift = 0x1800000000.8p0,
#else
.shift = 0x1.8p52,
#endif
// exp polynomial coefficients.
.poly = {
// abs error: 1.555*2^-66
// ulp error: 0.509 (0.511 without fma)
// if |x| < ln2/256+eps
// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
// abs error if |x| < ln2/128: 1.7145*2^-56
0x1.ffffffffffdbdp-2,
0x1.555555555543cp-3,
0x1.55555cf172b91p-5,
0x1.1111167a4d017p-7,
},
.exp2_shift = 0x1.8p52 / N,
// exp2 polynomial coefficients.
.exp2_poly = {
// abs error: 1.2195*2^-65
// ulp error: 0.507 (0.511 without fma)
// if |x| < 1/256
// abs error if |x| < 1/128: 1.9941*2^-56
0x1.62e42fefa39efp-1,
0x1.ebfbdff82c424p-3,
0x1.c6b08d70cf4b5p-5,
0x1.3b2abd24650ccp-7,
0x1.5d7e09b4e3a84p-10,
},
// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
// tab[2*k] = asuint64(T[k])
// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
.tab = {
0x0, 0x3ff0000000000000,
0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
0xbc905e7a108766d1, 0x3fefe315e86e7f85,
0x3c8cd2523567f613, 0x3fefd9b0d3158574,
0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
0x3c60f74e61e6c861, 0x3fefc74518759bc8,
0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
0xbc6a033489906e0b, 0x3fef9b66affed31b,
0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
0xbc91c923b9d5f416, 0x3fef829aaea92de0,
0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
0xbc801b15eaa59348, 0x3fef72b83c7d517b,
0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
0xbc96d99c7611eb26, 0x3fef5be084045cd4,
0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
0x3c968efde3a8a894, 0x3fef387a6e756238,
0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
0x3c834d754db0abb6, 0x3fef06fe0a31b715,
0x3c864201e2ac744c, 0x3fef0170fc4cd831,
0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
0xbc9907f81b512d8e, 0x3feeecae6d05d866,
0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
0x3c859f48a72a4c6d, 0x3feedea64c123422,
0xbc9312607a28698a, 0x3feeda4504ac801c,
0xbc58a78f4817895b, 0x3feed60a21f72e2a,
0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
0x3c4363ed60c2ac11, 0x3feece086061892d,
0x3c9666093b0664ef, 0x3feeca41ed1d0057,
0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
0x3c93350518fdd78e, 0x3feeaf4736b527da,
0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
0x3c9063e1e21c5409, 0x3feeab07dd485429,
0x3c34c7855019c6ea, 0x3feea9268a5946b7,
0x3c9432e62b64c035, 0x3feea76f15ad2148,
0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
0xbc845378892be9ae, 0x3feea34634ccc320,
0xbc93cedd78565858, 0x3feea23882552225,
0x3c5710aa807e1964, 0x3feea155d44ca973,
0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
0xbc6a12ad8734b982, 0x3feea012750bdabf,
0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
0xbc80dc3d54e08851, 0x3fee9f7df9519484,
0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
0xbc8619321e55e68a, 0x3fee9feb564267c9,
0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
0xbc7b32dcb94da51d, 0x3feea11473eb0187,
0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
0xbc9369b6f13b3734, 0x3feea589994cce13,
0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
0xbc94d450d872576e, 0x3feea8d99b4492ed,
0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
0x3c7bf68359f35f44, 0x3feeb1ae99157736,
0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
0xbc92434322f4f9aa, 0x3feebd829fde4e50,
0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
0x3c71affc2b91ce27, 0x3feec49182a3f090,
0x3c6dd235e10a73bb, 0x3feec86319e32323,
0xbc87c50422622263, 0x3feecc667b5de565,
0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
0x3c90cc319cee31d2, 0x3feed99e1330b358,
0x3c8469846e735ab3, 0x3feede6b5579fdbf,
0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
0xbc90a40e3da6f640, 0x3feef9728de5593a,
0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
0xbc91eee26b588a35, 0x3fef05b030a1064a,
0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
0x3c736eae30af0cb3, 0x3fef199bdd85529c,
0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
0x3c676b2c6c921968, 0x3fef3720dcef9069,
0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
0xbc900dae3875a949, 0x3fef4f87080d89f2,
0x3c74a385a63d07a7, 0x3fef5818dcfba487,
0xbc82919e2040220f, 0x3fef60e316c98398,
0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
0x3c843a59ac016b4b, 0x3fef7321f301b460,
0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
0xbc892ab93b470dc9, 0x3fef864614f5a129,
0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
},
};

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _EXP_DATA_H
#define _EXP_DATA_H
#include "musl_features.h"
#include <stdint.h>
#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
extern hidden const struct exp_data {
double invln2N;
double shift;
double negln2hiN;
double negln2loN;
double poly[4]; /* Last four coefficients. */
double exp2_shift;
double exp2_poly[EXP2_POLY_ORDER];
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
} __exp_data;
#endif

View File

@ -0,0 +1,10 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <sys/syscall.h>
extern long int syscall (long int __sysno, ...) __THROW;
int fallocate(int fd, int mode, off_t base, off_t len)
{
return syscall(SYS_fallocate, fd, mode, base, len);
}

View File

@ -0,0 +1,61 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <stdarg.h>
#include <errno.h>
#include <unistd.h>
#include <syscall.h>
#include "syscall.h"
#include <features.h>
#if defined(__GLIBC__) && __GLIBC__ >= 2
# define GLIBC_MINOR __GLIBC_MINOR__
#elif defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ >= 2
# define GLIBC_MINOR __GNU_LIBRARY_MINOR__
#endif
#if defined(GLIBC_MINOR) && GLIBC_MINOR >= 28
int fcntl64(int fd, int cmd, ...)
{
unsigned long arg;
va_list ap;
va_start(ap, cmd);
arg = va_arg(ap, unsigned long);
va_end(ap);
if (cmd == F_SETFL) arg |= O_LARGEFILE;
if (cmd == F_SETLKW) return syscall(SYS_fcntl, fd, cmd, (void *)arg);
if (cmd == F_GETOWN) {
struct f_owner_ex ex;
int ret = __syscall(SYS_fcntl, fd, F_GETOWN_EX, &ex);
if (ret == -EINVAL) return __syscall(SYS_fcntl, fd, cmd, (void *)arg);
if (ret) return __syscall_ret(ret);
return ex.type == F_OWNER_PGRP ? -ex.pid : ex.pid;
}
if (cmd == F_DUPFD_CLOEXEC) {
int ret = __syscall(SYS_fcntl, fd, F_DUPFD_CLOEXEC, arg);
if (ret != -EINVAL) {
if (ret >= 0)
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
return __syscall_ret(ret);
}
ret = __syscall(SYS_fcntl, fd, F_DUPFD_CLOEXEC, 0);
if (ret != -EINVAL) {
if (ret >= 0) __syscall(SYS_close, ret);
return __syscall_ret(-EINVAL);
}
ret = __syscall(SYS_fcntl, fd, F_DUPFD, arg);
if (ret >= 0) __syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
return __syscall_ret(ret);
}
switch (cmd) {
case F_SETLK:
case F_GETLK:
case F_GETOWN_EX:
case F_SETOWN_EX:
return syscall(SYS_fcntl, fd, cmd, (void *)arg);
default:
return syscall(SYS_fcntl, fd, cmd, arg);
}
}
#endif

View File

@ -0,0 +1,38 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <fcntl.h>
#include <errno.h>
#include "syscall.h"
#include <syscall.h>
int futimens(int fd, const struct timespec times[2])
{
int r = __syscall(SYS_utimensat, fd, 0, times, 0);
#ifdef SYS_futimesat
if (r != -ENOSYS) return __syscall_ret(r);
struct timeval *tv = 0, tmp[2];
if (times) {
int i;
tv = tmp;
for (i=0; i<2; i++) {
if (times[i].tv_nsec >= 1000000000ULL) {
if (times[i].tv_nsec == UTIME_NOW &&
times[1-i].tv_nsec == UTIME_NOW) {
tv = 0;
break;
}
if (times[i].tv_nsec == UTIME_OMIT)
return __syscall_ret(-ENOSYS);
return __syscall_ret(-EINVAL);
}
tmp[i].tv_sec = times[i].tv_sec;
tmp[i].tv_usec = times[i].tv_nsec / 1000;
}
}
r = __syscall(SYS_futimesat, fd, 0, tv);
if (r != -ENOSYS || fd != AT_FDCWD) return __syscall_ret(r);
r = __syscall(SYS_utimes, 0, tv);
#endif
return __syscall_ret(r);
}

View File

@ -0,0 +1,33 @@
#define _DEFAULT_SOURCE
#include <unistd.h>
#include <sys/random.h>
#include <pthread.h>
#include <errno.h>
int getentropy(void *buffer, size_t len)
{
int cs, ret = 0;
char *pos = buffer;
if (len > 256) {
errno = EIO;
return -1;
}
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
while (len) {
ret = getrandom(pos, len, 0);
if (ret < 0) {
if (errno == EINTR) continue;
else break;
}
pos += ret;
len -= ret;
ret = 0;
}
pthread_setcancelstate(cs, 0);
return ret;
}

View File

@ -0,0 +1,23 @@
/** We have to replace glibc getrandom only when glibc version is higher than 2.25.
* In previous versions of glibc this function doesn't exist
* and old kernels may be missing SYS_getrandom syscall.
*/
#include <features.h>
#if defined(__GLIBC__) && __GLIBC__ >= 2
# define GLIBC_MINOR __GLIBC_MINOR__
#elif defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ >= 2
# define GLIBC_MINOR __GNU_LIBRARY_MINOR__
#endif
#if defined(GLIBC_MINOR) && GLIBC_MINOR >= 25
#include <unistd.h>
#include <syscall.h>
#include "syscall.h"
ssize_t getrandom(void *buf, size_t buflen, unsigned flags)
{
/// There was cancellable syscall (syscall_cp), but I don't care too.
return syscall(SYS_getrandom, buf, buflen, flags);
}
#endif

View File

@ -0,0 +1,240 @@
#include <glob.h>
#include <fnmatch.h>
#include <sys/stat.h>
#include <dirent.h>
#include <limits.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <stddef.h>
struct match
{
struct match *next;
char name[1];
};
static int is_literal(const char *p, int useesc)
{
int bracket = 0;
for (; *p; p++) {
switch (*p) {
case '\\':
if (!useesc) break;
case '?':
case '*':
return 0;
case '[':
bracket = 1;
break;
case ']':
if (bracket) return 0;
break;
}
}
return 1;
}
static int append(struct match **tail, const char *name, size_t len, int mark)
{
struct match *new = malloc(sizeof(struct match) + len + 1);
if (!new) return -1;
(*tail)->next = new;
new->next = NULL;
strcpy(new->name, name);
if (mark) strcat(new->name, "/");
*tail = new;
return 0;
}
static int match_in_dir(const char *d, const char *p, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
{
DIR *dir;
struct dirent *de;
char pat[strlen(p)+1];
char *p2;
size_t l = strlen(d);
int literal;
int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
| ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
int error;
if ((p2 = strchr(p, '/'))) {
strcpy(pat, p);
pat[p2-p] = 0;
for (; *p2 == '/'; p2++);
p = pat;
}
literal = is_literal(p, !(flags & GLOB_NOESCAPE));
if (*d == '/' && !*(d+1)) l = 0;
/* rely on opendir failing for nondirectory objects */
dir = opendir(*d ? d : ".");
error = errno;
if (!dir) {
/* this is not an error -- we let opendir call stat for us */
if (error == ENOTDIR) return 0;
if (error == EACCES && !*p) {
struct stat st;
if (!stat(d, &st) && S_ISDIR(st.st_mode)) {
if (append(tail, d, l, l))
return GLOB_NOSPACE;
return 0;
}
}
if (errfunc(d, error) || (flags & GLOB_ERR))
return GLOB_ABORTED;
return 0;
}
if (!*p) {
error = append(tail, d, l, l) ? GLOB_NOSPACE : 0;
closedir(dir);
return error;
}
while ((de = readdir(dir))) {
char namebuf[l+de->d_reclen+2], *name = namebuf;
if (!literal && fnmatch(p, de->d_name, fnm_flags))
continue;
if (literal && strcmp(p, de->d_name))
continue;
if (p2 && de->d_type && !S_ISDIR(de->d_type<<12) && !S_ISLNK(de->d_type<<12))
continue;
/* With GLOB_PERIOD, don't allow matching . or .. unless
* fnmatch would match them with FNM_PERIOD rules in effect. */
if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
&& (!de->d_name[1] || (de->d_name[1]=='.' && !de->d_name[2]))
&& fnmatch(p, de->d_name, fnm_flags | FNM_PERIOD))
continue;
if (*d) {
memcpy(name, d, l);
name[l] = '/';
strcpy(name+l+1, de->d_name);
} else {
name = de->d_name;
}
if (p2) {
if ((error = match_in_dir(name, p2, flags, errfunc, tail))) {
closedir(dir);
return error;
}
} else {
int mark = 0;
if (flags & GLOB_MARK) {
if (de->d_type && !S_ISLNK(de->d_type<<12))
mark = S_ISDIR(de->d_type<<12);
else {
struct stat st;
stat(name, &st);
mark = S_ISDIR(st.st_mode);
}
}
if (append(tail, name, l+de->d_reclen+1, mark)) {
closedir(dir);
return GLOB_NOSPACE;
}
}
}
closedir(dir);
if (error && (errfunc(d, error) || (flags & GLOB_ERR)))
return GLOB_ABORTED;
return 0;
}
static int ignore_err(const char *path, int err)
{
return 0;
}
static void freelist(struct match *head)
{
struct match *match, *next;
for (match=head->next; match; match=next) {
next = match->next;
free(match);
}
}
static int sort(const void *a, const void *b)
{
return strcmp(*(const char **)a, *(const char **)b);
}
int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
{
const char *p=pat, *d;
struct match head = { .next = NULL }, *tail = &head;
size_t cnt, i;
size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0;
int error = 0;
if (*p == '/') {
for (; *p == '/'; p++);
d = "/";
} else {
d = "";
}
if (!errfunc) errfunc = ignore_err;
if (!(flags & GLOB_APPEND)) {
g->gl_offs = offs;
g->gl_pathc = 0;
g->gl_pathv = NULL;
}
if (strnlen(p, PATH_MAX+1) > PATH_MAX) return GLOB_NOSPACE;
if (*pat) error = match_in_dir(d, p, flags, errfunc, &tail);
if (error == GLOB_NOSPACE) {
freelist(&head);
return error;
}
for (cnt=0, tail=head.next; tail; tail=tail->next, cnt++);
if (!cnt) {
if (flags & GLOB_NOCHECK) {
tail = &head;
if (append(&tail, pat, strlen(pat), 0))
return GLOB_NOSPACE;
cnt++;
} else
return GLOB_NOMATCH;
}
if (flags & GLOB_APPEND) {
char **pathv = realloc(g->gl_pathv, (offs + g->gl_pathc + cnt + 1) * sizeof(char *));
if (!pathv) {
freelist(&head);
return GLOB_NOSPACE;
}
g->gl_pathv = pathv;
offs += g->gl_pathc;
} else {
g->gl_pathv = malloc((offs + cnt + 1) * sizeof(char *));
if (!g->gl_pathv) {
freelist(&head);
return GLOB_NOSPACE;
}
for (i=0; i<offs; i++)
g->gl_pathv[i] = NULL;
}
for (i=0, tail=head.next; i<cnt; tail=tail->next, i++)
g->gl_pathv[offs + i] = tail->name;
g->gl_pathv[offs + i] = NULL;
g->gl_pathc += cnt;
if (!(flags & GLOB_NOSORT))
qsort(g->gl_pathv+offs, cnt, sizeof(char *), sort);
return error;
}
void globfree(glob_t *g)
{
size_t i;
for (i=0; i<g->gl_pathc; i++)
free(g->gl_pathv[g->gl_offs + i] - offsetof(struct match, name));
free(g->gl_pathv);
g->gl_pathc = 0;
g->gl_pathv = NULL;
}

View File

@ -0,0 +1,268 @@
/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunSoft, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*
*/
/* lgamma_r(x, signgamp)
* Reentrant version of the logarithm of the Gamma function
* with user provide pointer for the sign of Gamma(x).
*
* Method:
* 1. Argument Reduction for 0 < x <= 8
* Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
* reduce x to a number in [1.5,2.5] by
* lgamma(1+s) = log(s) + lgamma(s)
* for example,
* lgamma(7.3) = log(6.3) + lgamma(6.3)
* = log(6.3*5.3) + lgamma(5.3)
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
* 2. Polynomial approximation of lgamma around its
* minimun ymin=1.461632144968362245 to maintain monotonicity.
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
* Let z = x-ymin;
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
* where
* poly(z) is a 14 degree polynomial.
* 2. Rational approximation in the primary interval [2,3]
* We use the following approximation:
* s = x-2.0;
* lgamma(x) = 0.5*s + s*P(s)/Q(s)
* with accuracy
* |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
* Our algorithms are based on the following observation
*
* zeta(2)-1 2 zeta(3)-1 3
* lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
* 2 3
*
* where Euler = 0.5771... is the Euler constant, which is very
* close to 0.5.
*
* 3. For x>=8, we have
* lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
* (better formula:
* lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
* Let z = 1/x, then we approximation
* f(z) = lgamma(x) - (x-0.5)(log(x)-1)
* by
* 3 5 11
* w = w0 + w1*z + w2*z + w3*z + ... + w6*z
* where
* |w - f(z)| < 2**-58.74
*
* 4. For negative x, since (G is gamma function)
* -x*G(-x)*G(x) = pi/sin(pi*x),
* we have
* G(x) = pi/(sin(pi*x)*(-x)*G(-x))
* since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
* Hence, for x<0, signgam = sign(sin(pi*x)) and
* lgamma(x) = log(|Gamma(x)|)
* = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
* Note: one should avoid compute pi*(-x) directly in the
* computation of sin(pi*(-x)).
*
* 5. Special Cases
* lgamma(2+s) ~ s*(1-Euler) for tiny s
* lgamma(1) = lgamma(2) = 0
* lgamma(x) ~ -log(|x|) for tiny x
* lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero
* lgamma(inf) = inf
* lgamma(-inf) = inf (bug for bug compatible with C99!?)
*
*/
static const double
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
/* tt = -(tail of tf) */
tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
#include <stdint.h>
#include <math.h>
double lgamma_r(double x, int *signgamp)
{
union {double f; uint64_t i;} u = {x};
double_t t,y,z,nadj=0,p,p1,p2,p3,q,r,w;
uint32_t ix;
int sign,i;
/* purge off +-inf, NaN, +-0, tiny and negative arguments */
*signgamp = 1;
sign = u.i>>63;
ix = u.i>>32 & 0x7fffffff;
if (ix >= 0x7ff00000)
return x*x;
if (ix < (0x3ff-70)<<20) { /* |x|<2**-70, return -log(|x|) */
if(sign) {
x = -x;
*signgamp = -1;
}
return -log(x);
}
if (sign) {
x = -x;
t = sin(pi * x);
if (t == 0.0) /* -integer */
return 1.0/(x-x);
if (t > 0.0)
*signgamp = -1;
else
t = -t;
nadj = log(pi/(t*x));
}
/* purge off 1 and 2 */
if ((ix == 0x3ff00000 || ix == 0x40000000) && (uint32_t)u.i == 0)
r = 0;
/* for x < 2.0 */
else if (ix < 0x40000000) {
if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
r = -log(x);
if (ix >= 0x3FE76944) {
y = 1.0 - x;
i = 0;
} else if (ix >= 0x3FCDA661) {
y = x - (tc-1.0);
i = 1;
} else {
y = x;
i = 2;
}
} else {
r = 0.0;
if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
y = 2.0 - x;
i = 0;
} else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
y = x - tc;
i = 1;
} else {
y = x - 1.0;
i = 2;
}
}
switch (i) {
case 0:
z = y*y;
p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
p = y*p1+p2;
r += (p-0.5*y);
break;
case 1:
z = y*y;
w = z*y;
p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
p = z*p1-(tt-w*(p2+y*p3));
r += tf + p;
break;
case 2:
p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
p2 = 1.0+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
r += -0.5*y + p1/p2;
}
} else if (ix < 0x40200000) { /* x < 8.0 */
i = (int)x;
y = x - (double)i;
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
q = 1.0+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
r = 0.5*y+p/q;
z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
switch (i) {
case 7: z *= y + 6.0; /* FALLTHRU */
case 6: z *= y + 5.0; /* FALLTHRU */
case 5: z *= y + 4.0; /* FALLTHRU */
case 4: z *= y + 3.0; /* FALLTHRU */
case 3: z *= y + 2.0; /* FALLTHRU */
r += log(z);
break;
}
} else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
t = log(x);
z = 1.0/x;
y = z*z;
w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
r = (x-0.5)*(t-1.0)+w;
} else /* 2**58 <= x <= inf */
r = x*(log(x)-1.0);
if (sign)
r = nadj - r;
return r;
}
int signgam;
double lgamma(double x)
{
return lgamma_r(x, &signgam);
}

View File

@ -0,0 +1,249 @@
#ifndef _LIBM_H
#define _LIBM_H
#include <stdint.h>
#include <float.h>
#include <math.h>
#include <endian.h>
#include "musl_features.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t m;
uint16_t se;
} i;
};
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
/* This is the m68k variant of 80-bit long double, and this definition only works
* on archs where the alignment requirement of uint64_t is <= 4. */
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t pad;
uint64_t m;
} i;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
long double f;
struct {
uint64_t lo;
uint32_t mid;
uint16_t top;
uint16_t se;
} i;
struct {
uint64_t lo;
uint64_t hi;
} i2;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
union ldshape {
long double f;
struct {
uint16_t se;
uint16_t top;
uint32_t mid;
uint64_t lo;
} i;
struct {
uint64_t hi;
uint64_t lo;
} i2;
};
#else
#error Unsupported long double representation
#endif
/* Support non-nearest rounding mode. */
#define WANT_ROUNDING 1
/* Support signaling NaNs. */
#define WANT_SNAN 0
#if WANT_SNAN
#error SNaN is unsupported
#else
#define issignalingf_inline(x) 0
#define issignaling_inline(x) 0
#endif
#ifndef TOINT_INTRINSICS
#define TOINT_INTRINSICS 0
#endif
#if TOINT_INTRINSICS
/* Round x to nearest int in all rounding modes, ties have to be rounded
consistently with converttoint so the results match. If the result
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
static double_t roundtoint(double_t);
/* Convert x to nearest int in all rounding modes, ties have to be rounded
consistently with roundtoint. If the result is not representible in an
int32_t then the semantics is unspecified. */
static int32_t converttoint(double_t);
#endif
/* Helps static branch prediction so hot path can be better optimized. */
#ifdef __GNUC__
#define predict_true(x) __builtin_expect(!!(x), 1)
#define predict_false(x) __builtin_expect(x, 0)
#else
#define predict_true(x) (x)
#define predict_false(x) (x)
#endif
/* Evaluate an expression as the specified type. With standard excess
precision handling a type cast or assignment is enough (with
-ffloat-store an assignment is required, in old compilers argument
passing and return statement may not drop excess precision). */
static inline float eval_as_float(float x)
{
float y = x;
return y;
}
static inline double eval_as_double(double x)
{
double y = x;
return y;
}
/* fp_barrier returns its input, but limits code transformations
as if it had a side-effect (e.g. observable io) and returned
an arbitrary value. */
#ifndef fp_barrierf
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x)
{
volatile float y = x;
return y;
}
#endif
#ifndef fp_barrier
#define fp_barrier fp_barrier
static inline double fp_barrier(double x)
{
volatile double y = x;
return y;
}
#endif
#ifndef fp_barrierl
#define fp_barrierl fp_barrierl
static inline long double fp_barrierl(long double x)
{
volatile long double y = x;
return y;
}
#endif
/* fp_force_eval ensures that the input value is computed when that's
otherwise unused. To prevent the constant folding of the input
expression, an additional fp_barrier may be needed or a compilation
mode that does so (e.g. -frounding-math in gcc). Then it can be
used to evaluate an expression for its fenv side-effects only. */
#ifndef fp_force_evalf
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x)
{
volatile float y;
y = x;
}
#endif
#ifndef fp_force_eval
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x)
{
volatile double y;
y = x;
}
#endif
#ifndef fp_force_evall
#define fp_force_evall fp_force_evall
static inline void fp_force_evall(long double x)
{
volatile long double y;
y = x;
}
#endif
#define FORCE_EVAL(x) do { \
if (sizeof(x) == sizeof(float)) { \
fp_force_evalf(x); \
} else if (sizeof(x) == sizeof(double)) { \
fp_force_eval(x); \
} else { \
fp_force_evall(x); \
} \
} while(0)
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
#define EXTRACT_WORDS(hi,lo,d) \
do { \
uint64_t __u = asuint64(d); \
(hi) = __u >> 32; \
(lo) = (uint32_t)__u; \
} while (0)
#define GET_HIGH_WORD(hi,d) \
do { \
(hi) = asuint64(d) >> 32; \
} while (0)
#define GET_LOW_WORD(lo,d) \
do { \
(lo) = (uint32_t)asuint64(d); \
} while (0)
#define INSERT_WORDS(d,hi,lo) \
do { \
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
} while (0)
#define SET_HIGH_WORD(d,hi) \
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
#define SET_LOW_WORD(d,lo) \
INSERT_WORDS(d, asuint64(d)>>32, lo)
#define GET_FLOAT_WORD(w,d) \
do { \
(w) = asuint(d); \
} while (0)
#define SET_FLOAT_WORD(d,w) \
do { \
(d) = asfloat(w); \
} while (0)
extern int __signgam;
hidden double __lgamma_r(double, int *);
hidden float __lgammaf_r(float, int *);
/* error handling functions */
hidden float __math_xflowf(uint32_t, float);
hidden float __math_uflowf(uint32_t);
hidden float __math_oflowf(uint32_t);
hidden float __math_divzerof(uint32_t);
hidden float __math_invalidf(float);
hidden double __math_xflow(uint32_t, double);
hidden double __math_uflow(uint32_t);
hidden double __math_oflow(uint32_t);
hidden double __math_divzero(uint32_t);
hidden double __math_invalid(double);
#endif

View File

@ -0,0 +1,112 @@
/*
* Double-precision log(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log_data.h"
#define T __log_data.tab
#define T2 __log_data.tab2
#define B __log_data.poly1
#define A __log_data.poly
#define Ln2hi __log_data.ln2hi
#define Ln2lo __log_data.ln2lo
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t top16(double x)
{
return asuint64(x) >> 48;
}
double log(double x)
{
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = top16(x);
#define LO asuint64(1.0 - 0x1p-4)
#define HI asuint64(1.0 + 0x1.09p-4)
if (predict_false(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
y = r3 *
(B[1] + r * B[2] + r2 * B[3] +
r3 * (B[4] + r * B[5] + r2 * B[6] +
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
return eval_as_double(y);
}
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t)k;
/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;
/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
y = lo + r2 * A[0] +
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
return eval_as_double(y);
}

View File

@ -0,0 +1,122 @@
/*
* Double-precision log2(x) function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log2_data.h"
#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
#define A __log2_data.poly
#define InvLn2hi __log2_data.invln2hi
#define InvLn2lo __log2_data.invln2lo
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000
/* Top 16 bits of a double. */
static inline uint32_t top16(double x)
{
return asuint64(x) >> 48;
}
double log2(double x)
{
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;
ix = asuint64(x);
top = top16(x);
#define LO asuint64(1.0 - 0x1.5b51p-5)
#define HI asuint64(1.0 + 0x1.6ab2p-5)
if (predict_false(ix - LO < HI - LO)) {
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
return 0;
r = x - 1.0;
#if __FP_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
return eval_as_double(y);
}
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero(1);
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid(x);
/* x is subnormal, normalize it. */
ix = asuint64(x * 0x1p52);
ix -= 52ULL << 52;
}
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble(iz);
kd = (double_t)k;
/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if __FP_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = __builtin_fma(z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble(asuint64(r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif
/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;
/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
return eval_as_double(y);
}

View File

@ -0,0 +1,201 @@
/*
* Data for log2.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "log2_data.h"
#define N (1 << LOG2_TABLE_BITS)
const struct log2_data __log2_data = {
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
.invln2hi = 0x1.7154765200000p+0,
.invln2lo = 0x1.705fc2eefa200p-33,
.poly1 = {
// relative error: 0x1.2fad8188p-63
// in -0x1.5b51p-5 0x1.6ab2p-5
-0x1.71547652b82fep-1,
0x1.ec709dc3a03f7p-2,
-0x1.71547652b7c3fp-2,
0x1.2776c50f05be4p-2,
-0x1.ec709dd768fe5p-3,
0x1.a61761ec4e736p-3,
-0x1.7153fbc64a79bp-3,
0x1.484d154f01b4ap-3,
-0x1.289e4a72c383cp-3,
0x1.0b32f285aee66p-3,
},
.poly = {
// relative error: 0x1.a72c2bf8p-58
// abs error: 0x1.67a552c8p-66
// in -0x1.f45p-8 0x1.f45p-8
-0x1.71547652b8339p-1,
0x1.ec709dc3a04bep-2,
-0x1.7154764702ffbp-2,
0x1.2776c50034c48p-2,
-0x1.ec7b328ea92bcp-3,
0x1.a6225e117f92ep-3,
},
/* Algorithm:
x = 2^k z
log2(x) = k + log2(c) + log2(z/c)
log2(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log2(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p10 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-64 and
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
Note: 1) ensures that k + logc can be computed without rounding error, 2)
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
},
#if !__FP_FAST_FMA
.tab2 = {
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
},
#endif
};

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOG2_DATA_H
#define _LOG2_DATA_H
#include "musl_features.h"
#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern hidden const struct log2_data {
double invln2hi;
double invln2lo;
double poly[LOG2_POLY_ORDER - 1];
double poly1[LOG2_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG2_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data;
#endif

View File

@ -0,0 +1,72 @@
/*
* Single-precision log2 function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log2f_data.h"
/*
LOG2F_TABLE_BITS = 4
LOG2F_POLY_ORDER = 4
ULP error: 0.752 (nearest rounding.)
Relative error: 1.9 * 2^-26 (before rounding.)
*/
#define N (1 << LOG2F_TABLE_BITS)
#define T __log2f_data.tab
#define A __log2f_data.poly
#define OFF 0x3f330000
float log2f(float x)
{
double_t z, r, r2, p, y, y0, invc, logc;
uint32_t ix, iz, top, tmp;
int k, i;
ix = asuint(x);
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
return 0;
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof(1);
if (ix == 0x7f800000) /* log2(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t)tmp >> 23; /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t)k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
p = A[3] * r + y0;
y = y * r2 + p;
return eval_as_float(y);
}

View File

@ -0,0 +1,33 @@
/*
* Data definition for log2f.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "log2f_data.h"
const struct log2f_data __log2f_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
},
.poly = {
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
0x1.715475f35c8b8p0,
}
};

View File

@ -0,0 +1,19 @@
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOG2F_DATA_H
#define _LOG2F_DATA_H
#include "musl_features.h"
#define LOG2F_TABLE_BITS 4
#define LOG2F_POLY_ORDER 4
extern hidden const struct log2f_data {
struct {
double invc, logc;
} tab[1 << LOG2F_TABLE_BITS];
double poly[LOG2F_POLY_ORDER];
} __log2f_data;
#endif

View File

@ -0,0 +1,328 @@
/*
* Data for log.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "log_data.h"
#define N (1 << LOG_TABLE_BITS)
const struct log_data __log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly1 = {
// relative error: 0x1.c04d76cp-63
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
-0x1p-1,
0x1.5555555555577p-2,
-0x1.ffffffffffdcbp-3,
0x1.999999995dd0cp-3,
-0x1.55555556745a7p-3,
0x1.24924a344de3p-3,
-0x1.fffffa4423d65p-4,
0x1.c7184282ad6cap-4,
-0x1.999eb43b068ffp-4,
0x1.78182f7afd085p-4,
-0x1.5521375d145cdp-4,
},
.poly = {
// relative error: 0x1.926199e8p-56
// abs error: 0x1.882ff33p-65
// in -0x1.fp-9 0x1.fp-9
-0x1.0000000000001p-1,
0x1.555555551305bp-2,
-0x1.fffffffeb459p-3,
0x1.999b324f10111p-3,
-0x1.55575e506c89fp-3,
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p9 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-66 and
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
{0x1.008040614b195p+0, -0x1.0040979240000p-9},
{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
},
#if !__FP_FAST_FMA
.tab2 = {
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
},
#endif
};

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOG_DATA_H
#define _LOG_DATA_H
#include "musl_features.h"
#define LOG_TABLE_BITS 7
#define LOG_POLY_ORDER 6
#define LOG_POLY1_ORDER 12
extern hidden const struct log_data {
double ln2hi;
double ln2lo;
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
double poly1[LOG_POLY1_ORDER - 1];
struct {
double invc, logc;
} tab[1 << LOG_TABLE_BITS];
#if !__FP_FAST_FMA
struct {
double chi, clo;
} tab2[1 << LOG_TABLE_BITS];
#endif
} __log_data;
#endif

View File

@ -0,0 +1,71 @@
/*
* Single-precision log function.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "logf_data.h"
/*
LOGF_TABLE_BITS = 4
LOGF_POLY_ORDER = 4
ULP error: 0.818 (nearest rounding.)
Relative error: 1.957 * 2^-26 (before rounding.)
*/
#define T __logf_data.tab
#define A __logf_data.poly
#define Ln2 __logf_data.ln2
#define N (1 << LOGF_TABLE_BITS)
#define OFF 0x3f330000
float logf(float x)
{
double_t z, r, r2, y, y0, invc, logc;
uint32_t ix, iz, tmp;
int k, i;
ix = asuint(x);
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
return 0;
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof(1);
if (ix == 0x7f800000) /* log(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t)tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0x1ff << 23);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1;
y0 = logc + (double_t)k * Ln2;
/* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
y = y * r2 + (y0 + r);
return eval_as_float(y);
}

View File

@ -0,0 +1,33 @@
/*
* Data definition for logf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "logf_data.h"
const struct logf_data __logf_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
},
.ln2 = 0x1.62e42fefa39efp-1,
.poly = {
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
}
};

View File

@ -0,0 +1,20 @@
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _LOGF_DATA_H
#define _LOGF_DATA_H
#include "musl_features.h"
#define LOGF_TABLE_BITS 4
#define LOGF_POLY_ORDER 4
extern hidden const struct logf_data {
struct {
double invc, logc;
} tab[1 << LOGF_TABLE_BITS];
double ln2;
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
} __logf_data;
#endif

View File

@ -0,0 +1,5 @@
#pragma once
#define weak __attribute__((__weak__))
#define hidden __attribute__((__visibility__("hidden")))
#define predict_false(x) __builtin_expect(x, 0)

View File

@ -0,0 +1,23 @@
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <syscall.h>
#include "syscall.h"
int pipe2(int fd[2], int flag)
{
if (!flag) return pipe(fd);
int ret = __syscall(SYS_pipe2, fd, flag);
if (ret != -ENOSYS) return __syscall_ret(ret);
ret = pipe(fd);
if (ret) return ret;
if (flag & O_CLOEXEC) {
__syscall(SYS_fcntl, fd[0], F_SETFD, FD_CLOEXEC);
__syscall(SYS_fcntl, fd[1], F_SETFD, FD_CLOEXEC);
}
if (flag & O_NONBLOCK) {
__syscall(SYS_fcntl, fd[0], F_SETFL, O_NONBLOCK);
__syscall(SYS_fcntl, fd[1], F_SETFL, O_NONBLOCK);
}
return 0;
}

View File

@ -0,0 +1,107 @@
/// Very limited implementation. Half of code from Musl was cut.
/// This is Ok, because for now, this function is used only from clang driver.
#define _GNU_SOURCE
#include <spawn.h>
#include <sched.h>
#include <unistd.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <syscall.h>
#include <sys/signal.h>
#include <pthread.h>
#include <spawn.h>
#include <errno.h>
#include "syscall.h"
struct args {
int p[2];
sigset_t oldmask;
const char *path;
int (*exec)(const char *, char *const *, char *const *);
const posix_spawn_file_actions_t *fa;
const posix_spawnattr_t *restrict attr;
char *const *argv, *const *envp;
};
void __get_handler_set(sigset_t *);
static int child(void *args_vp)
{
int ret;
struct args *args = args_vp;
int p = args->p[1];
const posix_spawnattr_t *restrict attr = args->attr;
close(args->p[0]);
/* Close-on-exec flag may have been lost if we moved the pipe
* to a different fd. We don't use F_DUPFD_CLOEXEC above because
* it would fail on older kernels and atomicity is not needed --
* in this process there are no threads or signal handlers. */
__syscall(SYS_fcntl, p, F_SETFD, FD_CLOEXEC);
pthread_sigmask(SIG_SETMASK, (attr->__flags & POSIX_SPAWN_SETSIGMASK)
? &attr->__ss : &args->oldmask, 0);
args->exec(args->path, args->argv, args->envp);
ret = -errno;
/* Since sizeof errno < PIPE_BUF, the write is atomic. */
ret = -ret;
if (ret) while (__syscall(SYS_write, p, &ret, sizeof ret) < 0);
_exit(127);
}
int __posix_spawnx(pid_t *restrict res, const char *restrict path,
int (*exec)(const char *, char *const *, char *const *),
const posix_spawn_file_actions_t *fa,
const posix_spawnattr_t *restrict attr,
char *const argv[restrict], char *const envp[restrict])
{
pid_t pid;
char stack[1024];
int ec=0, cs;
struct args args;
if (pipe2(args.p, O_CLOEXEC))
return errno;
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
args.path = path;
args.exec = exec;
args.fa = fa;
args.attr = attr ? attr : &(const posix_spawnattr_t){0};
args.argv = argv;
args.envp = envp;
pid = clone(child, stack+sizeof stack,
CLONE_VM|CLONE_VFORK|SIGCHLD, &args);
close(args.p[1]);
if (pid > 0) {
if (read(args.p[0], &ec, sizeof ec) != sizeof ec) ec = 0;
else waitpid(pid, &(int){0}, 0);
} else {
ec = -pid;
}
close(args.p[0]);
if (!ec && res) *res = pid;
pthread_setcancelstate(cs, 0);
return ec;
}
int posix_spawn(pid_t *restrict res, const char *restrict path,
const posix_spawn_file_actions_t *fa,
const posix_spawnattr_t *restrict attr,
char *const argv[restrict], char *const envp[restrict])
{
return __posix_spawnx(res, path, execve, fa, attr, argv, envp);
}

View File

@ -0,0 +1,343 @@
/*
* Double-precision x^y function.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"
#include "pow_data.h"
/*
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
*/
#define T __pow_log_data.tab
#define A __pow_log_data.poly
#define Ln2hi __pow_log_data.ln2hi
#define Ln2lo __pow_log_data.ln2lo
#define N (1 << POW_LOG_TABLE_BITS)
#define OFF 0x3fe6955500000000
/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t top12(double x)
{
return asuint64(x) >> 52;
}
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t log_inline(uint64_t ix, double_t *tail)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t)tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
z = asdouble(iz);
kd = (double_t)k;
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc;
logc = T[i].logc;
logctail = T[i].logctail;
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
#if __FP_FAST_FMA
r = __builtin_fma(z, invc, -1.0);
#else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc;
r = rhi + rlo;
#endif
/* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc;
t2 = t1 + r;
lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r;
/* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
#if __FP_FAST_FMA
hi = t2 + ar2;
lo3 = __builtin_fma(ar, r, -ar2);
lo4 = t2 - hi + ar2;
#else
double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi;
hi = t2 + arhi2;
lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2;
#endif
/* p = log1p(r) - r - A[0]*r*r. */
p = (ar3 * (A[1] + r * A[2] +
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
*tail = hi - y + lo;
return y;
}
#undef N
#undef T
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;
if ((ki & 0x80000000) == 0) {
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble(sbits);
y = 0x1p1009 * (scale + scale * tmp);
return eval_as_double(y);
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
/* Note: sbits is signed scale. */
scale = asdouble(sbits);
y = scale + scale * tmp;
if (fabs(y) < 1.0) {
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0;
if (y < 0.0)
one = -1.0;
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
y = eval_as_double(hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble(sbits & 0x8000000000000000);
/* The underflow exception needs to be signaled explicitly. */
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return eval_as_double(y);
}
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;
abstop = top12(x) & 0x7ff;
if (predict_false(abstop - top12(0x1p-54) >=
top12(512.0) - top12(0x1p-54))) {
if (abstop - top12(0x1p-54) >= 0x80000000) {
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one;
}
if (abstop >= top12(1024.0)) {
/* Note: inf and nan are already handled. */
if (asuint64(x) >> 63)
return __math_uflow(sign_bias);
else
return __math_oflow(sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
}
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint(z);
ki = converttoint(z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd) >> 16;
kd = (double_t)(int32_t)ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double(z + Shift);
ki = asuint64(kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble(T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
if (predict_false(abstop == 0))
return specialcase(tmp, sbits, ki);
scale = asdouble(sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double(scale + scale * tmp);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint64_t iy)
{
int e = iy >> 52 & 0x7ff;
if (e < 0x3ff)
return 0;
if (e > 0x3ff + 52)
return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0;
if (iy & (1ULL << (0x3ff + 52 - e)))
return 1;
return 2;
}
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int zeroinfnan(uint64_t i)
{
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
}
double pow(double x, double y)
{
uint32_t sign_bias = 0;
uint64_t ix, iy;
uint32_t topx, topy;
ix = asuint64(x);
iy = asuint64(y);
topx = top12(x);
topy = top12(y);
if (predict_false(topx - 0x001 >= 0x7ff - 0x001 ||
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
/* Special cases: (x < 0x1p-126 or inf or nan) or
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (predict_false(zeroinfnan(iy))) {
if (2 * iy == 0)
return issignaling_inline(x) ? x + y : 1.0;
if (ix == asuint64(1.0))
return issignaling_inline(y) ? x + y : 1.0;
if (2 * ix > 2 * asuint64(INFINITY) ||
2 * iy > 2 * asuint64(INFINITY))
return x + y;
if (2 * ix == 2 * asuint64(1.0))
return 1.0;
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (predict_false(zeroinfnan(ix))) {
double_t x2 = x * x;
if (ix >> 63 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? fp_barrier(1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalid(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64(1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be) {
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64(1.0) ? 1.0 + y :
1.0 - y;
else
return 1.0;
}
return (ix > asuint64(1.0)) == (topy < 0x800) ?
__math_oflow(0) :
__math_uflow(0);
}
if (topx == 0) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint64(x * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
}
double_t lo;
double_t hi = log_inline(ix, &lo);
double_t ehi, elo;
#if __FP_FAST_FMA
ehi = y * hi;
elo = y * lo + __builtin_fma(y, hi, -ehi);
#else
double_t yhi = asdouble(iy & -1ULL << 27);
double_t ylo = y - yhi;
double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
double_t llo = hi - lhi + lo;
ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif
return exp_inline(ehi, elo, sign_bias);
}

View File

@ -0,0 +1,180 @@
/*
* Data for the log part of pow.
*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "pow_data.h"
#define N (1 << POW_LOG_TABLE_BITS)
const struct pow_log_data __pow_log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly = {
// relative error: 0x1.11922ap-70
// in -0x1.6bp-8 0x1.6bp-8
// Coefficients are scaled to match the scaling during evaluation.
-0x1p-1,
0x1.555555555556p-2 * -2,
-0x1.0000000000006p-2 * -2,
0x1.999999959554ep-3 * 4,
-0x1.555555529a47ap-3 * 4,
0x1.2495b9b4845e9p-3 * -8,
-0x1.0002b8b263fc3p-3 * -8,
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
and z falls into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = round(0x1p43*log(c))/0x1p43
tab[i].logctail = (double)(log(c) - logc)
where c is chosen near the center of the subinterval such that 1/c has only a
few precision bits so z/c - 1 is exactly representible as double:
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
error and the interval for z is selected such that near x == 1, where log(x)
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
.tab = {
#define A(a, b, c) {a, 0, b, c},
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
},
};

View File

@ -0,0 +1,22 @@
/*
* Copyright (c) 2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _POW_DATA_H
#define _POW_DATA_H
#include "musl_features.h"
#define POW_LOG_TABLE_BITS 7
#define POW_LOG_POLY_ORDER 8
extern hidden const struct pow_log_data {
double ln2hi;
double ln2lo;
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
/* Note: the pad field is unused, but allows slightly faster indexing. */
struct {
double invc, pad, logc, logctail;
} tab[1 << POW_LOG_TABLE_BITS];
} __pow_log_data;
#endif

View File

@ -0,0 +1,185 @@
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp2f_data.h"
#include "powf_data.h"
/*
POWF_LOG2_POLY_ORDER = 5
EXP2F_TABLE_BITS = 5
ULP error: 0.82 (~ 0.5 + relerr*2^24)
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
*/
#define N (1 << POWF_LOG2_TABLE_BITS)
#define T __powf_log2_data.tab
#define A __powf_log2_data.poly
#define OFF 0x3f330000
/* Subnormal input is normalized so ix has negative biased exponent.
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
static inline double_t log2_inline(uint32_t ix)
{
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
uint32_t iz, top, tmp;
int k, i;
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t)k;
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[0] * r + A[1];
p = A[2] * r + A[3];
r4 = r2 * r2;
q = A[4] * r + y0;
q = p * r2 + q;
y = y * r4 + q;
return y;
}
#undef N
#undef T
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
/* The output of log2 and thus the input of exp2 is either scaled by N
(in case of fast toint intrinsics) or not. The unscaled xd must be
in [-1021,1023], sign_bias sets the sign of the result. */
static inline float exp2_inline(double_t xd, uint32_t sign_bias)
{
uint64_t ki, ski, t;
double_t kd, z, r, r2, y, s;
#if TOINT_INTRINSICS
#define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint(xd); /* k */
ki = converttoint(xd);
#else
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double(xd + SHIFT);
ki = asuint64(kd);
kd -= SHIFT; /* k/N */
#endif
r = xd - kd;
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble(t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float(y);
}
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int checkint(uint32_t iy)
{
int e = iy >> 23 & 0xff;
if (e < 0x7f)
return 0;
if (e > 0x7f + 23)
return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0;
if (iy & (1 << (0x7f + 23 - e)))
return 1;
return 2;
}
static inline int zeroinfnan(uint32_t ix)
{
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
}
float powf(float x, float y)
{
uint32_t sign_bias = 0;
uint32_t ix, iy;
ix = asuint(x);
iy = asuint(y);
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
zeroinfnan(iy))) {
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (predict_false(zeroinfnan(iy))) {
if (2 * iy == 0)
return issignalingf_inline(x) ? x + y : 1.0f;
if (ix == 0x3f800000)
return issignalingf_inline(y) ? x + y : 1.0f;
if (2 * ix > 2u * 0x7f800000 ||
2 * iy > 2u * 0x7f800000)
return x + y;
if (2 * ix == 2 * 0x3f800000)
return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (predict_false(zeroinfnan(ix))) {
float_t x2 = x * x;
if (ix & 0x80000000 && checkint(iy) == 1)
x2 = -x2;
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
}
/* x and y are non-zero finite. */
if (ix & 0x80000000) {
/* Finite x < 0. */
int yint = checkint(iy);
if (yint == 0)
return __math_invalidf(x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000) {
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint(x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
}
double_t logx = log2_inline(ix);
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
asuint64(126.0 * POWF_SCALE) >> 47)) {
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
return __math_oflowf(sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf(sign_bias);
}
return exp2_inline(ylogx, sign_bias);
}

View File

@ -0,0 +1,34 @@
/*
* Data definition for powf.
*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#include "powf_data.h"
const struct powf_log2_data __powf_log2_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
},
.poly = {
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
0x1.71547652ab82bp0 * POWF_SCALE,
}
};

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2017-2018, Arm Limited.
* SPDX-License-Identifier: MIT
*/
#ifndef _POWF_DATA_H
#define _POWF_DATA_H
#include "libm.h"
#include "exp2f_data.h"
#define POWF_LOG2_TABLE_BITS 4
#define POWF_LOG2_POLY_ORDER 5
#if TOINT_INTRINSICS
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
#else
#define POWF_SCALE_BITS 0
#endif
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
extern hidden const struct powf_log2_data {
struct {
double invc, logc;
} tab[1 << POWF_LOG2_TABLE_BITS];
double poly[POWF_LOG2_POLY_ORDER];
} __powf_log2_data;
#endif

View File

@ -0,0 +1,522 @@
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
/*
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* powl.c
*
* Power function, long double precision
*
*
* SYNOPSIS:
*
* long double x, y, z, powl();
*
* z = powl( x, y );
*
*
* DESCRIPTION:
*
* Computes x raised to the yth power. Analytically,
*
* x**y = exp( y log(x) ).
*
* Following Cody and Waite, this program uses a lookup table
* of 2**-i/32 and pseudo extended precision arithmetic to
* obtain several extra bits of accuracy in both the logarithm
* and the exponential.
*
*
* ACCURACY:
*
* The relative error of pow(x,y) can be estimated
* by y dl ln(2), where dl is the absolute error of
* the internally computed base 2 logarithm. At the ends
* of the approximation interval the logarithm equal 1/32
* and its relative error is about 1 lsb = 1.1e-19. Hence
* the predicted relative error in the result is 2.3e-21 y .
*
* Relative error:
* arithmetic domain # trials peak rms
*
* IEEE +-1000 40000 2.8e-18 3.7e-19
* .001 < x < 1000, with log(x) uniformly distributed.
* -1000 < y < 1000, y uniformly distributed.
*
* IEEE 0,8700 60000 6.5e-18 1.0e-18
* 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
*
*
* ERROR MESSAGES:
*
* message condition value returned
* pow overflow x**y > MAXNUM INFINITY
* pow underflow x**y < 1/MAXNUM 0.0
* pow domain x<0 and y noninteger 0.0
*
*/
#include "libm.h"
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
long double powl(long double x, long double y)
{
return pow(x, y);
}
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
/* Table size */
#define NXT 32
/* log(1+x) = x - .5x^2 + x^3 * P(z)/Q(z)
* on the domain 2^(-1/32) - 1 <= x <= 2^(1/32) - 1
*/
static const long double P[] = {
8.3319510773868690346226E-4L,
4.9000050881978028599627E-1L,
1.7500123722550302671919E0L,
1.4000100839971580279335E0L,
};
static const long double Q[] = {
/* 1.0000000000000000000000E0L,*/
5.2500282295834889175431E0L,
8.4000598057587009834666E0L,
4.2000302519914740834728E0L,
};
/* A[i] = 2^(-i/32), rounded to IEEE long double precision.
* If i is even, A[i] + B[i/2] gives additional accuracy.
*/
static const long double A[33] = {
1.0000000000000000000000E0L,
9.7857206208770013448287E-1L,
9.5760328069857364691013E-1L,
9.3708381705514995065011E-1L,
9.1700404320467123175367E-1L,
8.9735453750155359320742E-1L,
8.7812608018664974155474E-1L,
8.5930964906123895780165E-1L,
8.4089641525371454301892E-1L,
8.2287773907698242225554E-1L,
8.0524516597462715409607E-1L,
7.8799042255394324325455E-1L,
7.7110541270397041179298E-1L,
7.5458221379671136985669E-1L,
7.3841307296974965571198E-1L,
7.2259040348852331001267E-1L,
7.0710678118654752438189E-1L,
6.9195494098191597746178E-1L,
6.7712777346844636413344E-1L,
6.6261832157987064729696E-1L,
6.4841977732550483296079E-1L,
6.3452547859586661129850E-1L,
6.2092890603674202431705E-1L,
6.0762367999023443907803E-1L,
5.9460355750136053334378E-1L,
5.8186242938878875689693E-1L,
5.6939431737834582684856E-1L,
5.5719337129794626814472E-1L,
5.4525386633262882960438E-1L,
5.3357020033841180906486E-1L,
5.2213689121370692017331E-1L,
5.1094857432705833910408E-1L,
5.0000000000000000000000E-1L,
};
static const long double B[17] = {
0.0000000000000000000000E0L,
2.6176170809902549338711E-20L,
-1.0126791927256478897086E-20L,
1.3438228172316276937655E-21L,
1.2207982955417546912101E-20L,
-6.3084814358060867200133E-21L,
1.3164426894366316434230E-20L,
-1.8527916071632873716786E-20L,
1.8950325588932570796551E-20L,
1.5564775779538780478155E-20L,
6.0859793637556860974380E-21L,
-2.0208749253662532228949E-20L,
1.4966292219224761844552E-20L,
3.3540909728056476875639E-21L,
-8.6987564101742849540743E-22L,
-1.2327176863327626135542E-20L,
0.0000000000000000000000E0L,
};
/* 2^x = 1 + x P(x),
* on the interval -1/32 <= x <= 0
*/
static const long double R[] = {
1.5089970579127659901157E-5L,
1.5402715328927013076125E-4L,
1.3333556028915671091390E-3L,
9.6181291046036762031786E-3L,
5.5504108664798463044015E-2L,
2.4022650695910062854352E-1L,
6.9314718055994530931447E-1L,
};
#define MEXP (NXT*16384.0L)
/* The following if denormal numbers are supported, else -MEXP: */
#define MNEXP (-NXT*(16384.0L+64.0L))
/* log2(e) - 1 */
#define LOG2EA 0.44269504088896340735992L
#define F W
#define Fa Wa
#define Fb Wb
#define G W
#define Ga Wa
#define Gb u
#define H W
#define Ha Wb
#define Hb Wb
static const long double MAXLOGL = 1.1356523406294143949492E4L;
static const long double MINLOGL = -1.13994985314888605586758E4L;
static const long double LOGE2L = 6.9314718055994530941723E-1L;
static const long double huge = 0x1p10000L;
/* XXX Prevent gcc from erroneously constant folding this. */
static const volatile long double twom10000 = 0x1p-10000L;
static long double reducl(long double);
static long double powil(long double, int);
long double powl(long double x, long double y)
{
/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
int i, nflg, iyflg, yoddint;
long e;
volatile long double z=0;
long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
/* make sure no invalid exception is raised by nan comparision */
if (isnan(x)) {
if (!isnan(y) && y == 0.0)
return 1.0;
return x;
}
if (isnan(y)) {
if (x == 1.0)
return 1.0;
return y;
}
if (x == 1.0)
return 1.0; /* 1**y = 1, even if y is nan */
if (x == -1.0 && !isfinite(y))
return 1.0; /* -1**inf = 1 */
if (y == 0.0)
return 1.0; /* x**0 = 1, even if x is nan */
if (y == 1.0)
return x;
if (y >= LDBL_MAX) {
if (x > 1.0 || x < -1.0)
return INFINITY;
if (x != 0.0)
return 0.0;
}
if (y <= -LDBL_MAX) {
if (x > 1.0 || x < -1.0)
return 0.0;
if (x != 0.0 || y == -INFINITY)
return INFINITY;
}
if (x >= LDBL_MAX) {
if (y > 0.0)
return INFINITY;
return 0.0;
}
w = floorl(y);
/* Set iyflg to 1 if y is an integer. */
iyflg = 0;
if (w == y)
iyflg = 1;
/* Test for odd integer y. */
yoddint = 0;
if (iyflg) {
ya = fabsl(y);
ya = floorl(0.5 * ya);
yb = 0.5 * fabsl(w);
if( ya != yb )
yoddint = 1;
}
if (x <= -LDBL_MAX) {
if (y > 0.0) {
if (yoddint)
return -INFINITY;
return INFINITY;
}
if (y < 0.0) {
if (yoddint)
return -0.0;
return 0.0;
}
}
nflg = 0; /* (x<0)**(odd int) */
if (x <= 0.0) {
if (x == 0.0) {
if (y < 0.0) {
if (signbit(x) && yoddint)
/* (-0.0)**(-odd int) = -inf, divbyzero */
return -1.0/0.0;
/* (+-0.0)**(negative) = inf, divbyzero */
return 1.0/0.0;
}
if (signbit(x) && yoddint)
return -0.0;
return 0.0;
}
if (iyflg == 0)
return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
/* (x<0)**(integer) */
if (yoddint)
nflg = 1; /* negate result */
x = -x;
}
/* (+integer)**(integer) */
if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
w = powil(x, (int)y);
return nflg ? -w : w;
}
/* separate significand from exponent */
x = frexpl(x, &i);
e = i;
/* find significand in antilog table A[] */
i = 1;
if (x <= A[17])
i = 17;
if (x <= A[i+8])
i += 8;
if (x <= A[i+4])
i += 4;
if (x <= A[i+2])
i += 2;
if (x >= A[1])
i = -1;
i += 1;
/* Find (x - A[i])/A[i]
* in order to compute log(x/A[i]):
*
* log(x) = log( a x/a ) = log(a) + log(x/a)
*
* log(x/a) = log(1+v), v = x/a - 1 = (x-a)/a
*/
x -= A[i];
x -= B[i/2];
x /= A[i];
/* rational approximation for log(1+v):
*
* log(1+v) = v - v**2/2 + v**3 P(v) / Q(v)
*/
z = x*x;
w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
w = w - 0.5*z;
/* Convert to base 2 logarithm:
* multiply by log2(e) = 1 + LOG2EA
*/
z = LOG2EA * w;
z += w;
z += LOG2EA * x;
z += x;
/* Compute exponent term of the base 2 logarithm. */
w = -i;
w /= NXT;
w += e;
/* Now base 2 log of x is w + z. */
/* Multiply base 2 log by y, in extended precision. */
/* separate y into large part ya
* and small part yb less than 1/NXT
*/
ya = reducl(y);
yb = y - ya;
/* (w+z)(ya+yb)
* = w*ya + w*yb + z*y
*/
F = z * y + w * yb;
Fa = reducl(F);
Fb = F - Fa;
G = Fa + w * ya;
Ga = reducl(G);
Gb = G - Ga;
H = Fb + Gb;
Ha = reducl(H);
w = (Ga + Ha) * NXT;
/* Test the power of 2 for overflow */
if (w > MEXP)
return huge * huge; /* overflow */
if (w < MNEXP)
return twom10000 * twom10000; /* underflow */
e = w;
Hb = H - Ha;
if (Hb > 0.0) {
e += 1;
Hb -= 1.0/NXT; /*0.0625L;*/
}
/* Now the product y * log2(x) = Hb + e/NXT.
*
* Compute base 2 exponential of Hb,
* where -0.0625 <= Hb <= 0.
*/
z = Hb * __polevll(Hb, R, 6); /* z = 2**Hb - 1 */
/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
* Find lookup table entry for the fractional power of 2.
*/
if (e < 0)
i = 0;
else
i = 1;
i = e/NXT + i;
e = NXT*i - e;
w = A[e];
z = w * z; /* 2**-e * ( 1 + (2**Hb-1) ) */
z = z + w;
z = scalbnl(z, i); /* multiply by integer power of 2 */
if (nflg)
z = -z;
return z;
}
/* Find a multiple of 1/NXT that is within 1/NXT of x. */
static long double reducl(long double x)
{
long double t;
t = x * NXT;
t = floorl(t);
t = t / NXT;
return t;
}
/*
* Positive real raised to integer power, long double precision
*
*
* SYNOPSIS:
*
* long double x, y, powil();
* int n;
*
* y = powil( x, n );
*
*
* DESCRIPTION:
*
* Returns argument x>0 raised to the nth power.
* The routine efficiently decomposes n as a sum of powers of
* two. The desired power is a product of two-to-the-kth
* powers of x. Thus to compute the 32767 power of x requires
* 28 multiplications instead of 32767 multiplications.
*
*
* ACCURACY:
*
* Relative error:
* arithmetic x domain n domain # trials peak rms
* IEEE .001,1000 -1022,1023 50000 4.3e-17 7.8e-18
* IEEE 1,2 -1022,1023 20000 3.9e-17 7.6e-18
* IEEE .99,1.01 0,8700 10000 3.6e-16 7.2e-17
*
* Returns MAXNUM on overflow, zero on underflow.
*/
static long double powil(long double x, int nn)
{
long double ww, y;
long double s;
int n, e, sign, lx;
if (nn == 0)
return 1.0;
if (nn < 0) {
sign = -1;
n = -nn;
} else {
sign = 1;
n = nn;
}
/* Overflow detection */
/* Calculate approximate logarithm of answer */
s = x;
s = frexpl( s, &lx);
e = (lx - 1)*n;
if ((e == 0) || (e > 64) || (e < -64)) {
s = (s - 7.0710678118654752e-1L) / (s + 7.0710678118654752e-1L);
s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
} else {
s = LOGE2L * e;
}
if (s > MAXLOGL)
return huge * huge; /* overflow */
if (s < MINLOGL)
return twom10000 * twom10000; /* underflow */
/* Handle tiny denormal answer, but with less accuracy
* since roundoff error in 1.0/x will be amplified.
* The precise demarcation should be the gradual underflow threshold.
*/
if (s < -MAXLOGL+2.0) {
x = 1.0/x;
sign = -sign;
}
/* First bit of the power */
if (n & 1)
y = x;
else
y = 1.0;
ww = x;
n >>= 1;
while (n) {
ww = ww * ww; /* arg to the 2-to-the-kth power */
if (n & 1) /* if that bit is set, then include in product */
y *= ww;
n >>= 1;
}
if (sign < 0)
y = 1.0/y;
return y;
}
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
// TODO: broken implementation to make things compile
long double powl(long double x, long double y)
{
return pow(x, y);
}
#endif

View File

@ -0,0 +1,11 @@
#define _DEFAULT_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <syscall.h>
#include "syscall.h"
ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
{
/// There was cancellable syscall (syscall_cp), but I don't care.
return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32));
}

View File

@ -0,0 +1,11 @@
#define _GNU_SOURCE
#include <sched.h>
int __sched_cpucount(size_t size, const cpu_set_t *set)
{
size_t i, j, cnt=0;
const unsigned char *p = (const void *)set;
for (i=0; i<size; i++) for (j=0; j<8; j++)
if (p[i] & (1<<j)) cnt++;
return cnt;
}

View File

@ -0,0 +1,42 @@
#define _GNU_SOURCE
#include <errno.h>
#include <sched.h>
#include "syscall.h"
#include "atomic.h"
#ifdef VDSO_GETCPU_SYM
static void *volatile vdso_func;
typedef long (*getcpu_f)(unsigned *, unsigned *, void *);
static long getcpu_init(unsigned *cpu, unsigned *node, void *unused)
{
void *p = __vdsosym(VDSO_GETCPU_VER, VDSO_GETCPU_SYM);
getcpu_f f = (getcpu_f)p;
a_cas_p(&vdso_func, (void *)getcpu_init, p);
return f ? f(cpu, node, unused) : -ENOSYS;
}
static void *volatile vdso_func = (void *)getcpu_init;
#endif
int sched_getcpu(void)
{
int r;
unsigned cpu;
#ifdef VDSO_GETCPU_SYM
getcpu_f f = (getcpu_f)vdso_func;
if (f) {
r = f(&cpu, 0, 0);
if (!r) return cpu;
if (r != -ENOSYS) return __syscall_ret(r);
}
#endif
r = __syscall(SYS_getcpu, &cpu, 0, 0);
if (!r) return cpu;
return __syscall_ret(r);
}

View File

@ -0,0 +1,11 @@
#define _GNU_SOURCE
#include <stdlib.h>
char *getenv(const char *name);
char *secure_getenv(const char *name)
{
// NOTE: Let's assume we're not running as SUID
return getenv(name);
/* return libc.secure ? NULL : getenv(name); */
}

View File

@ -0,0 +1,15 @@
#pragma once
#include <sys/syscall.h>
#include <syscall_arch.h>
typedef long syscall_arg_t;
__attribute__((visibility("hidden")))
long __syscall_ret(unsigned long);
__attribute__((visibility("hidden")))
long __syscall(syscall_arg_t, ...);
__attribute__((visibility("hidden")))
void *__vdsosym(const char *, const char *);

View File

@ -0,0 +1,11 @@
#include <errno.h>
#include "syscall.h"
long __syscall_ret(unsigned long r)
{
if (r > -4096UL) {
errno = -r;
return -1;
}
return r;
}

View File

@ -0,0 +1,12 @@
#include <time.h>
int clock_gettime(clockid_t, struct timespec *);
/* There is no other implemented value than TIME_UTC; all other values
* are considered erroneous. */
int timespec_get(struct timespec * ts, int base)
{
if (base != TIME_UTC) return 0;
int ret = clock_gettime(CLOCK_REALTIME, ts);
return ret < 0 ? 0 : base;
}

View File

@ -0,0 +1,38 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <fcntl.h>
#include <errno.h>
#include "syscall.h"
#include <syscall.h>
int utimensat(int fd, const char *path, const struct timespec times[2], int flags)
{
int r = __syscall(SYS_utimensat, fd, path, times, flags);
#ifdef SYS_futimesat
if (r != -ENOSYS || flags) return __syscall_ret(r);
struct timeval *tv = 0, tmp[2];
if (times) {
int i;
tv = tmp;
for (i=0; i<2; i++) {
if (times[i].tv_nsec >= 1000000000ULL) {
if (times[i].tv_nsec == UTIME_NOW &&
times[1-i].tv_nsec == UTIME_NOW) {
tv = 0;
break;
}
if (times[i].tv_nsec == UTIME_OMIT)
return __syscall_ret(-ENOSYS);
return __syscall_ret(-EINVAL);
}
tmp[i].tv_sec = times[i].tv_sec;
tmp[i].tv_usec = times[i].tv_nsec / 1000;
}
}
r = __syscall(SYS_futimesat, fd, path, tv);
if (r != -ENOSYS || fd != AT_FDCWD) return __syscall_ret(r);
r = __syscall(SYS_utimes, path, tv);
#endif
return __syscall_ret(r);
}

View File

@ -0,0 +1,15 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
int vasprintf(char **s, const char *fmt, va_list ap)
{
va_list ap2;
va_copy(ap2, ap);
int l = vsnprintf(0, 0, fmt, ap2);
va_end(ap2);
if (l<0 || !(*s=malloc(l+1U))) return -1;
return vsnprintf(*s, l+1U, fmt, ap);
}

View File

@ -0,0 +1,105 @@
#include <elf.h>
#include <link.h>
#include <limits.h>
#include <stdint.h>
#include <string.h>
#include <sys/auxv.h>
#include "syscall.h"
#ifdef VDSO_USEFUL
#if ULONG_MAX == 0xffffffff
typedef Elf32_Ehdr Ehdr;
typedef Elf32_Phdr Phdr;
typedef Elf32_Sym Sym;
typedef Elf32_Verdef Verdef;
typedef Elf32_Verdaux Verdaux;
#else
typedef Elf64_Ehdr Ehdr;
typedef Elf64_Phdr Phdr;
typedef Elf64_Sym Sym;
typedef Elf64_Verdef Verdef;
typedef Elf64_Verdaux Verdaux;
#endif
static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
{
vsym &= 0x7fff;
for (;;) {
if (!(def->vd_flags & VER_FLG_BASE)
&& (def->vd_ndx & 0x7fff) == vsym)
break;
if (def->vd_next == 0)
return 0;
def = (Verdef *)((char *)def + def->vd_next);
}
Verdaux *aux = (Verdaux *)((char *)def + def->vd_aux);
return !strcmp(vername, strings + aux->vda_name);
}
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
extern char** environ;
static Ehdr *eh = NULL;
void *__vdsosym(const char *vername, const char *name);
// We don't have libc struct available here. Compute aux vector manually.
__attribute__((constructor)) static void auxv_init()
{
size_t i, *auxv;
for (i=0; environ[i]; i++);
auxv = (void *)(environ+i+1);
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
if (!auxv[i]) return;
if (!auxv[i+1]) return;
eh = (void *)auxv[i+1];
}
void *__vdsosym(const char *vername, const char *name)
{
size_t i;
if (!eh) return 0;
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
size_t *dynv=0, base=-1;
for (i=0; i<eh->e_phnum; i++, ph=(void *)((char *)ph+eh->e_phentsize)) {
if (ph->p_type == PT_LOAD)
base = (size_t)eh + ph->p_offset - ph->p_vaddr;
else if (ph->p_type == PT_DYNAMIC)
dynv = (void *)((char *)eh + ph->p_offset);
}
if (!dynv || base==(size_t)-1) return 0;
char *strings = 0;
Sym *syms = 0;
Elf_Symndx *hashtab = 0;
uint16_t *versym = 0;
Verdef *verdef = 0;
for (i=0; dynv[i]; i+=2) {
void *p = (void *)(base + dynv[i+1]);
switch(dynv[i]) {
case DT_STRTAB: strings = p; break;
case DT_SYMTAB: syms = p; break;
case DT_HASH: hashtab = p; break;
case DT_VERSYM: versym = p; break;
case DT_VERDEF: verdef = p; break;
}
}
if (!strings || !syms || !hashtab) return 0;
if (!verdef) versym = 0;
for (i=0; i<hashtab[1]; i++) {
if (!(1<<(syms[i].st_info&0xf) & OK_TYPES)) continue;
if (!(1<<(syms[i].st_info>>4) & OK_BINDS)) continue;
if (!syms[i].st_shndx) continue;
if (strcmp(name, strings+syms[i].st_name)) continue;
if (versym && !checkver(verdef, versym[i], vername, strings))
continue;
return (void *)(base + syms[i].st_value);
}
return 0;
}
#endif

View File

@ -0,0 +1,125 @@
#include <stdint.h>
#define a_cas a_cas
static inline int a_cas(volatile int *p, int t, int s)
{
__asm__ __volatile__ (
"lock ; cmpxchg %3, %1"
: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
return t;
}
#define a_cas_p a_cas_p
static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
__asm__( "lock ; cmpxchg %3, %1"
: "=a"(t), "=m"(*(void *volatile *)p)
: "a"(t), "r"(s) : "memory" );
return t;
}
#define a_swap a_swap
static inline int a_swap(volatile int *p, int v)
{
__asm__ __volatile__(
"xchg %0, %1"
: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
return v;
}
#define a_fetch_add a_fetch_add
static inline int a_fetch_add(volatile int *p, int v)
{
__asm__ __volatile__(
"lock ; xadd %0, %1"
: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
return v;
}
#define a_and a_and
static inline void a_and(volatile int *p, int v)
{
__asm__ __volatile__(
"lock ; and %1, %0"
: "=m"(*p) : "r"(v) : "memory" );
}
#define a_or a_or
static inline void a_or(volatile int *p, int v)
{
__asm__ __volatile__(
"lock ; or %1, %0"
: "=m"(*p) : "r"(v) : "memory" );
}
#define a_and_64 a_and_64
static inline void a_and_64(volatile uint64_t *p, uint64_t v)
{
__asm__ __volatile(
"lock ; and %1, %0"
: "=m"(*p) : "r"(v) : "memory" );
}
#define a_or_64 a_or_64
static inline void a_or_64(volatile uint64_t *p, uint64_t v)
{
__asm__ __volatile__(
"lock ; or %1, %0"
: "=m"(*p) : "r"(v) : "memory" );
}
#define a_inc a_inc
static inline void a_inc(volatile int *p)
{
__asm__ __volatile__(
"lock ; incl %0"
: "=m"(*p) : "m"(*p) : "memory" );
}
#define a_dec a_dec
static inline void a_dec(volatile int *p)
{
__asm__ __volatile__(
"lock ; decl %0"
: "=m"(*p) : "m"(*p) : "memory" );
}
#define a_store a_store
static inline void a_store(volatile int *p, int x)
{
__asm__ __volatile__(
"mov %1, %0 ; lock ; orl $0,(%%rsp)"
: "=m"(*p) : "r"(x) : "memory" );
}
#define a_barrier a_barrier
static inline void a_barrier()
{
__asm__ __volatile__( "" : : : "memory" );
}
#define a_spin a_spin
static inline void a_spin()
{
__asm__ __volatile__( "pause" : : : "memory" );
}
#define a_crash a_crash
static inline void a_crash()
{
__asm__ __volatile__( "hlt" : : : "memory" );
}
#define a_ctz_64 a_ctz_64
static inline int a_ctz_64(uint64_t x)
{
__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
return x;
}
#define a_clz_64 a_clz_64
static inline int a_clz_64(uint64_t x)
{
__asm__( "bsr %1,%0 ; xor $63,%0" : "=r"(x) : "r"(x) );
return x;
}

View File

@ -0,0 +1,22 @@
/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */
.global musl_glibc_longjmp
.type musl_glibc_longjmp,@function
musl_glibc_longjmp:
mov 0x30(%rdi),%r8
mov 0x8(%rdi),%r9
mov 0x38(%rdi),%rdx
ror $0x11,%r8
xor %fs:0x30,%r8 /* this ends up being the stack pointer */
ror $0x11,%r9
xor %fs:0x30,%r9
ror $0x11,%rdx
xor %fs:0x30,%rdx /* this is the instruction pointer */
mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */
mov 0x10(%rdi),%r12
mov 0x18(%rdi),%r13
mov 0x20(%rdi),%r14
mov 0x28(%rdi),%r15
mov %esi,%eax
mov %r8,%rsp
mov %r9,%rbp
jmpq *%rdx /* goto saved address without altering rsp */

View File

@ -0,0 +1,15 @@
.global __syscall
.hidden __syscall
.type __syscall,@function
__syscall:
.cfi_startproc
movq %rdi,%rax
movq %rsi,%rdi
movq %rdx,%rsi
movq %rcx,%rdx
movq %r8,%r10
movq %r9,%r8
movq 8(%rsp),%r9
syscall
ret
.cfi_endproc

View File

@ -0,0 +1,5 @@
#define VDSO_USEFUL
#define VDSO_CGT_SYM "__vdso_clock_gettime"
#define VDSO_CGT_VER "LINUX_2.6"
#define VDSO_GETCPU_SYM "__vdso_getcpu"
#define VDSO_GETCPU_VER "LINUX_2.6"

View File

@ -398,7 +398,7 @@ build_curl() {
LDFLAGS="-L${TP_LIB_DIR}" LIBS="-lcrypto -lssl -lcrypto -ldl" \
CFLAGS="-fPIC" \
./configure --prefix=$TP_INSTALL_DIR --disable-shared --enable-static \
--without-librtmp --with-ssl=${TP_INSTALL_DIR} --without-libidn2 --disable-ldap --enable-ipv6
--without-librtmp --without-libssh2 --without-nghttp2 --with-ssl=${TP_INSTALL_DIR} --without-libidn2 --disable-ldap --enable-ipv6
make -j$PARALLEL && make install
}
@ -822,4 +822,4 @@ build_aws_c_event_stream
build_aws_sdk
build_js_and_css
echo "Finihsed to build all thirdparties"
echo "Finished to build all thirdparties"