Masking glibc symbols for better portability (#4180)
* Masking glibc symbols for better portability * Remove redundant files
This commit is contained in:
@ -501,8 +501,11 @@ else()
|
||||
message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}")
|
||||
endif()
|
||||
|
||||
add_subdirectory(${SRC_DIR}/glibc-compatibility)
|
||||
|
||||
set(DORIS_LINK_LIBS ${DORIS_LINK_LIBS}
|
||||
-lrt -lbfd -liberty -lc -lm -ldl -pthread
|
||||
-lrt -l:libbfd.a -liberty -lc -lm -ldl -pthread
|
||||
glibc-compatibility
|
||||
)
|
||||
|
||||
# Set libraries for test
|
||||
|
||||
55
be/src/glibc-compatibility/CMakeLists.txt
Normal file
55
be/src/glibc-compatibility/CMakeLists.txt
Normal file
@ -0,0 +1,55 @@
|
||||
enable_language(ASM)
|
||||
include(CheckIncludeFile)
|
||||
|
||||
check_include_file("sys/random.h" HAVE_SYS_RANDOM_H)
|
||||
|
||||
if(COMPILER_CLANG)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-builtin-requires-header")
|
||||
endif()
|
||||
|
||||
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.12")
|
||||
macro(add_glob cur_list)
|
||||
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} CONFIGURE_DEPENDS ${ARGN})
|
||||
list(APPEND ${cur_list} ${__tmp})
|
||||
endmacro()
|
||||
else ()
|
||||
macro(add_glob cur_list)
|
||||
file(GLOB __tmp RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${ARGN})
|
||||
list(APPEND ${cur_list} ${__tmp})
|
||||
endmacro()
|
||||
endif ()
|
||||
|
||||
macro(add_headers_and_sources prefix common_path)
|
||||
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
|
||||
add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h)
|
||||
endmacro()
|
||||
|
||||
macro(add_headers_only prefix common_path)
|
||||
add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h)
|
||||
endmacro()
|
||||
|
||||
add_headers_and_sources(glibc_compatibility .)
|
||||
add_headers_and_sources(glibc_compatibility musl)
|
||||
|
||||
list (APPEND glibc_compatibility_sources musl/x86_64/syscall.s musl/x86_64/longjmp.s FastMemcpy.c)
|
||||
set (musl_arch_include_dir musl/x86_64)
|
||||
|
||||
list(REMOVE_ITEM glibc_compatibility_sources musl/getentropy.c)
|
||||
if(HAVE_SYS_RANDOM_H)
|
||||
list(APPEND glibc_compatibility_sources musl/getentropy.c)
|
||||
endif()
|
||||
|
||||
# Need to omit frame pointers to match the performance of glibc
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer")
|
||||
|
||||
add_library(glibc-compatibility STATIC ${glibc_compatibility_sources})
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
target_compile_options(glibc-compatibility PRIVATE -Wno-unused-command-line-argument)
|
||||
elseif (COMPILER_GCC)
|
||||
target_compile_options(glibc-compatibility PRIVATE -Wno-unused-but-set-variable)
|
||||
endif ()
|
||||
|
||||
target_include_directories(glibc-compatibility PRIVATE ${musl_arch_include_dir})
|
||||
|
||||
message (STATUS "Some symbols from glibc will be replaced for compatibility")
|
||||
220
be/src/glibc-compatibility/FastMemcpy.c
Normal file
220
be/src/glibc-compatibility/FastMemcpy.c
Normal file
@ -0,0 +1,220 @@
|
||||
//=====================================================================
|
||||
//
|
||||
// FastMemcpy.c - skywind3000@163.com, 2015
|
||||
//
|
||||
// feature:
|
||||
// 50% speed up in avg. vs standard memcpy (tested in vc2012/gcc4.9)
|
||||
//
|
||||
//=====================================================================
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#if (defined(_WIN32) || defined(WIN32))
|
||||
#include <windows.h>
|
||||
#include <mmsystem.h>
|
||||
#ifdef _MSC_VER
|
||||
#pragma comment(lib, "winmm.lib")
|
||||
#endif
|
||||
#elif defined(__unix)
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#error it can only be compiled under windows or unix
|
||||
#endif
|
||||
|
||||
#include "FastMemcpy.h"
|
||||
|
||||
unsigned int gettime()
|
||||
{
|
||||
#if (defined(_WIN32) || defined(WIN32))
|
||||
return timeGetTime();
|
||||
#else
|
||||
static struct timezone tz={ 0,0 };
|
||||
struct timeval time;
|
||||
gettimeofday(&time,&tz);
|
||||
return (time.tv_sec * 1000 + time.tv_usec / 1000);
|
||||
#endif
|
||||
}
|
||||
|
||||
void sleepms(unsigned int millisec)
|
||||
{
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
Sleep(millisec);
|
||||
#else
|
||||
usleep(millisec * 1000);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void benchmark(int dstalign, int srcalign, size_t size, int times)
|
||||
{
|
||||
char *DATA1 = (char*)malloc(size + 64);
|
||||
char *DATA2 = (char*)malloc(size + 64);
|
||||
size_t LINEAR1 = ((size_t)DATA1);
|
||||
size_t LINEAR2 = ((size_t)DATA2);
|
||||
char *ALIGN1 = (char*)(((64 - (LINEAR1 & 63)) & 63) + LINEAR1);
|
||||
char *ALIGN2 = (char*)(((64 - (LINEAR2 & 63)) & 63) + LINEAR2);
|
||||
char *dst = (dstalign)? ALIGN1 : (ALIGN1 + 1);
|
||||
char *src = (srcalign)? ALIGN2 : (ALIGN2 + 3);
|
||||
unsigned int t1, t2;
|
||||
int k;
|
||||
|
||||
sleepms(100);
|
||||
t1 = gettime();
|
||||
for (k = times; k > 0; k--) {
|
||||
memcpy(dst, src, size);
|
||||
}
|
||||
t1 = gettime() - t1;
|
||||
sleepms(100);
|
||||
t2 = gettime();
|
||||
for (k = times; k > 0; k--) {
|
||||
memcpy_fast(dst, src, size);
|
||||
}
|
||||
t2 = gettime() - t2;
|
||||
|
||||
free(DATA1);
|
||||
free(DATA2);
|
||||
|
||||
printf("result(dst %s, src %s): memcpy_fast=%dms memcpy=%d ms\n",
|
||||
dstalign? "aligned" : "unalign",
|
||||
srcalign? "aligned" : "unalign", (int)t2, (int)t1);
|
||||
}
|
||||
|
||||
|
||||
void bench(int copysize, int times)
|
||||
{
|
||||
printf("benchmark(size=%d bytes, times=%d):\n", copysize, times);
|
||||
benchmark(1, 1, copysize, times);
|
||||
benchmark(1, 0, copysize, times);
|
||||
benchmark(0, 1, copysize, times);
|
||||
benchmark(0, 0, copysize, times);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
void random_bench(int maxsize, int times)
|
||||
{
|
||||
static char A[11 * 1024 * 1024 + 2];
|
||||
static char B[11 * 1024 * 1024 + 2];
|
||||
static int random_offsets[0x10000];
|
||||
static int random_sizes[0x8000];
|
||||
unsigned int i, p1, p2;
|
||||
unsigned int t1, t2;
|
||||
for (i = 0; i < 0x10000; i++) { // generate random offsets
|
||||
random_offsets[i] = rand() % (10 * 1024 * 1024 + 1);
|
||||
}
|
||||
for (i = 0; i < 0x8000; i++) { // generate random sizes
|
||||
random_sizes[i] = 1 + rand() % maxsize;
|
||||
}
|
||||
sleepms(100);
|
||||
t1 = gettime();
|
||||
for (p1 = 0, p2 = 0, i = 0; i < times; i++) {
|
||||
int offset1 = random_offsets[(p1++) & 0xffff];
|
||||
int offset2 = random_offsets[(p1++) & 0xffff];
|
||||
int size = random_sizes[(p2++) & 0x7fff];
|
||||
memcpy(A + offset1, B + offset2, size);
|
||||
}
|
||||
t1 = gettime() - t1;
|
||||
sleepms(100);
|
||||
t2 = gettime();
|
||||
for (p1 = 0, p2 = 0, i = 0; i < times; i++) {
|
||||
int offset1 = random_offsets[(p1++) & 0xffff];
|
||||
int offset2 = random_offsets[(p1++) & 0xffff];
|
||||
int size = random_sizes[(p2++) & 0x7fff];
|
||||
memcpy_fast(A + offset1, B + offset2, size);
|
||||
}
|
||||
t2 = gettime() - t2;
|
||||
printf("benchmark random access:\n");
|
||||
printf("memcpy_fast=%dms memcpy=%dms\n\n", (int)t2, (int)t1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma comment(lib, "winmm.lib")
|
||||
#endif
|
||||
|
||||
int main(void)
|
||||
{
|
||||
bench(32, 0x1000000);
|
||||
bench(64, 0x1000000);
|
||||
bench(512, 0x800000);
|
||||
bench(1024, 0x400000);
|
||||
bench(4096, 0x80000);
|
||||
bench(8192, 0x40000);
|
||||
bench(1024 * 1024 * 1, 0x800);
|
||||
bench(1024 * 1024 * 4, 0x200);
|
||||
bench(1024 * 1024 * 8, 0x100);
|
||||
|
||||
random_bench(2048, 8000000);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
benchmark(size=32 bytes, times=16777216):
|
||||
result(dst aligned, src aligned): memcpy_fast=78ms memcpy=260 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=78ms memcpy=250 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=78ms memcpy=266 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=78ms memcpy=234 ms
|
||||
|
||||
benchmark(size=64 bytes, times=16777216):
|
||||
result(dst aligned, src aligned): memcpy_fast=109ms memcpy=281 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=109ms memcpy=328 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=109ms memcpy=343 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=93ms memcpy=344 ms
|
||||
|
||||
benchmark(size=512 bytes, times=8388608):
|
||||
result(dst aligned, src aligned): memcpy_fast=125ms memcpy=218 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=156ms memcpy=484 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=172ms memcpy=546 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=172ms memcpy=515 ms
|
||||
|
||||
benchmark(size=1024 bytes, times=4194304):
|
||||
result(dst aligned, src aligned): memcpy_fast=109ms memcpy=172 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=187ms memcpy=453 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=172ms memcpy=437 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=156ms memcpy=452 ms
|
||||
|
||||
benchmark(size=4096 bytes, times=524288):
|
||||
result(dst aligned, src aligned): memcpy_fast=62ms memcpy=78 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=109ms memcpy=202 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=94ms memcpy=203 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=110ms memcpy=218 ms
|
||||
|
||||
benchmark(size=8192 bytes, times=262144):
|
||||
result(dst aligned, src aligned): memcpy_fast=62ms memcpy=78 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=78ms memcpy=202 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=78ms memcpy=203 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=94ms memcpy=203 ms
|
||||
|
||||
benchmark(size=1048576 bytes, times=2048):
|
||||
result(dst aligned, src aligned): memcpy_fast=203ms memcpy=191 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=219ms memcpy=281 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=218ms memcpy=328 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=218ms memcpy=312 ms
|
||||
|
||||
benchmark(size=4194304 bytes, times=512):
|
||||
result(dst aligned, src aligned): memcpy_fast=312ms memcpy=406 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=296ms memcpy=421 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=312ms memcpy=468 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=297ms memcpy=452 ms
|
||||
|
||||
benchmark(size=8388608 bytes, times=256):
|
||||
result(dst aligned, src aligned): memcpy_fast=281ms memcpy=452 ms
|
||||
result(dst aligned, src unalign): memcpy_fast=280ms memcpy=468 ms
|
||||
result(dst unalign, src aligned): memcpy_fast=298ms memcpy=514 ms
|
||||
result(dst unalign, src unalign): memcpy_fast=344ms memcpy=472 ms
|
||||
|
||||
benchmark random access:
|
||||
memcpy_fast=515ms memcpy=1014ms
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
694
be/src/glibc-compatibility/FastMemcpy.h
Normal file
694
be/src/glibc-compatibility/FastMemcpy.h
Normal file
@ -0,0 +1,694 @@
|
||||
//=====================================================================
|
||||
//
|
||||
// FastMemcpy.c - skywind3000@163.com, 2015
|
||||
//
|
||||
// feature:
|
||||
// 50% speed up in avg. vs standard memcpy (tested in vc2012/gcc5.1)
|
||||
//
|
||||
//=====================================================================
|
||||
#ifndef __FAST_MEMCPY_H__
|
||||
#define __FAST_MEMCPY_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
// force inline for compilers
|
||||
//---------------------------------------------------------------------
|
||||
#ifndef INLINE
|
||||
#ifdef __GNUC__
|
||||
#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
|
||||
#define INLINE __inline__ __attribute__((always_inline))
|
||||
#else
|
||||
#define INLINE __inline__
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#define INLINE __forceinline
|
||||
#elif (defined(__BORLANDC__) || defined(__WATCOMC__))
|
||||
#define INLINE __inline
|
||||
#else
|
||||
#define INLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef __attribute__((__aligned__(1))) uint16_t uint16_unaligned_t;
|
||||
typedef __attribute__((__aligned__(1))) uint32_t uint32_unaligned_t;
|
||||
typedef __attribute__((__aligned__(1))) uint64_t uint64_unaligned_t;
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
// fast copy for different sizes
|
||||
//---------------------------------------------------------------------
|
||||
static INLINE void memcpy_sse2_16(void *dst, const void *src) {
|
||||
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
|
||||
}
|
||||
|
||||
static INLINE void memcpy_sse2_32(void *dst, const void *src) {
|
||||
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
|
||||
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
|
||||
}
|
||||
|
||||
static INLINE void memcpy_sse2_64(void *dst, const void *src) {
|
||||
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
|
||||
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
|
||||
__m128i m2 = _mm_loadu_si128(((const __m128i*)src) + 2);
|
||||
__m128i m3 = _mm_loadu_si128(((const __m128i*)src) + 3);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 2, m2);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 3, m3);
|
||||
}
|
||||
|
||||
static INLINE void memcpy_sse2_128(void *dst, const void *src) {
|
||||
__m128i m0 = _mm_loadu_si128(((const __m128i*)src) + 0);
|
||||
__m128i m1 = _mm_loadu_si128(((const __m128i*)src) + 1);
|
||||
__m128i m2 = _mm_loadu_si128(((const __m128i*)src) + 2);
|
||||
__m128i m3 = _mm_loadu_si128(((const __m128i*)src) + 3);
|
||||
__m128i m4 = _mm_loadu_si128(((const __m128i*)src) + 4);
|
||||
__m128i m5 = _mm_loadu_si128(((const __m128i*)src) + 5);
|
||||
__m128i m6 = _mm_loadu_si128(((const __m128i*)src) + 6);
|
||||
__m128i m7 = _mm_loadu_si128(((const __m128i*)src) + 7);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 0, m0);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 1, m1);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 2, m2);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 3, m3);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 4, m4);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 5, m5);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 6, m6);
|
||||
_mm_storeu_si128(((__m128i*)dst) + 7, m7);
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
// tiny memory copy with jump table optimized
|
||||
//---------------------------------------------------------------------
|
||||
/// Attribute is used to avoid an error with undefined behaviour sanitizer
|
||||
/// ../contrib/FastMemcpy/FastMemcpy.h:91:56: runtime error: applying zero offset to null pointer
|
||||
/// Found by 01307_orc_output_format.sh, cause - ORCBlockInputFormat and external ORC library.
|
||||
__attribute__((__no_sanitize__("undefined"))) static INLINE void *memcpy_tiny(void *dst, const void *src, size_t size) {
|
||||
unsigned char *dd = ((unsigned char*)dst) + size;
|
||||
const unsigned char *ss = ((const unsigned char*)src) + size;
|
||||
|
||||
switch (size) {
|
||||
case 64:
|
||||
memcpy_sse2_64(dd - 64, ss - 64);
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case 65:
|
||||
memcpy_sse2_64(dd - 65, ss - 65);
|
||||
case 1:
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 66:
|
||||
memcpy_sse2_64(dd - 66, ss - 66);
|
||||
case 2:
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 67:
|
||||
memcpy_sse2_64(dd - 67, ss - 67);
|
||||
case 3:
|
||||
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 68:
|
||||
memcpy_sse2_64(dd - 68, ss - 68);
|
||||
case 4:
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 69:
|
||||
memcpy_sse2_64(dd - 69, ss - 69);
|
||||
case 5:
|
||||
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 70:
|
||||
memcpy_sse2_64(dd - 70, ss - 70);
|
||||
case 6:
|
||||
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 71:
|
||||
memcpy_sse2_64(dd - 71, ss - 71);
|
||||
case 7:
|
||||
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 72:
|
||||
memcpy_sse2_64(dd - 72, ss - 72);
|
||||
case 8:
|
||||
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
|
||||
break;
|
||||
|
||||
case 73:
|
||||
memcpy_sse2_64(dd - 73, ss - 73);
|
||||
case 9:
|
||||
*((uint64_unaligned_t*)(dd - 9)) = *((uint64_unaligned_t*)(ss - 9));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 74:
|
||||
memcpy_sse2_64(dd - 74, ss - 74);
|
||||
case 10:
|
||||
*((uint64_unaligned_t*)(dd - 10)) = *((uint64_unaligned_t*)(ss - 10));
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 75:
|
||||
memcpy_sse2_64(dd - 75, ss - 75);
|
||||
case 11:
|
||||
*((uint64_unaligned_t*)(dd - 11)) = *((uint64_unaligned_t*)(ss - 11));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 76:
|
||||
memcpy_sse2_64(dd - 76, ss - 76);
|
||||
case 12:
|
||||
*((uint64_unaligned_t*)(dd - 12)) = *((uint64_unaligned_t*)(ss - 12));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 77:
|
||||
memcpy_sse2_64(dd - 77, ss - 77);
|
||||
case 13:
|
||||
*((uint64_unaligned_t*)(dd - 13)) = *((uint64_unaligned_t*)(ss - 13));
|
||||
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 78:
|
||||
memcpy_sse2_64(dd - 78, ss - 78);
|
||||
case 14:
|
||||
*((uint64_unaligned_t*)(dd - 14)) = *((uint64_unaligned_t*)(ss - 14));
|
||||
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
|
||||
break;
|
||||
|
||||
case 79:
|
||||
memcpy_sse2_64(dd - 79, ss - 79);
|
||||
case 15:
|
||||
*((uint64_unaligned_t*)(dd - 15)) = *((uint64_unaligned_t*)(ss - 15));
|
||||
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
|
||||
break;
|
||||
|
||||
case 80:
|
||||
memcpy_sse2_64(dd - 80, ss - 80);
|
||||
case 16:
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 81:
|
||||
memcpy_sse2_64(dd - 81, ss - 81);
|
||||
case 17:
|
||||
memcpy_sse2_16(dd - 17, ss - 17);
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 82:
|
||||
memcpy_sse2_64(dd - 82, ss - 82);
|
||||
case 18:
|
||||
memcpy_sse2_16(dd - 18, ss - 18);
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 83:
|
||||
memcpy_sse2_64(dd - 83, ss - 83);
|
||||
case 19:
|
||||
memcpy_sse2_16(dd - 19, ss - 19);
|
||||
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 84:
|
||||
memcpy_sse2_64(dd - 84, ss - 84);
|
||||
case 20:
|
||||
memcpy_sse2_16(dd - 20, ss - 20);
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 85:
|
||||
memcpy_sse2_64(dd - 85, ss - 85);
|
||||
case 21:
|
||||
memcpy_sse2_16(dd - 21, ss - 21);
|
||||
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 86:
|
||||
memcpy_sse2_64(dd - 86, ss - 86);
|
||||
case 22:
|
||||
memcpy_sse2_16(dd - 22, ss - 22);
|
||||
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 87:
|
||||
memcpy_sse2_64(dd - 87, ss - 87);
|
||||
case 23:
|
||||
memcpy_sse2_16(dd - 23, ss - 23);
|
||||
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 88:
|
||||
memcpy_sse2_64(dd - 88, ss - 88);
|
||||
case 24:
|
||||
memcpy_sse2_16(dd - 24, ss - 24);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 89:
|
||||
memcpy_sse2_64(dd - 89, ss - 89);
|
||||
case 25:
|
||||
memcpy_sse2_16(dd - 25, ss - 25);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 90:
|
||||
memcpy_sse2_64(dd - 90, ss - 90);
|
||||
case 26:
|
||||
memcpy_sse2_16(dd - 26, ss - 26);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 91:
|
||||
memcpy_sse2_64(dd - 91, ss - 91);
|
||||
case 27:
|
||||
memcpy_sse2_16(dd - 27, ss - 27);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 92:
|
||||
memcpy_sse2_64(dd - 92, ss - 92);
|
||||
case 28:
|
||||
memcpy_sse2_16(dd - 28, ss - 28);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 93:
|
||||
memcpy_sse2_64(dd - 93, ss - 93);
|
||||
case 29:
|
||||
memcpy_sse2_16(dd - 29, ss - 29);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 94:
|
||||
memcpy_sse2_64(dd - 94, ss - 94);
|
||||
case 30:
|
||||
memcpy_sse2_16(dd - 30, ss - 30);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 95:
|
||||
memcpy_sse2_64(dd - 95, ss - 95);
|
||||
case 31:
|
||||
memcpy_sse2_16(dd - 31, ss - 31);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 96:
|
||||
memcpy_sse2_64(dd - 96, ss - 96);
|
||||
case 32:
|
||||
memcpy_sse2_32(dd - 32, ss - 32);
|
||||
break;
|
||||
|
||||
case 97:
|
||||
memcpy_sse2_64(dd - 97, ss - 97);
|
||||
case 33:
|
||||
memcpy_sse2_32(dd - 33, ss - 33);
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 98:
|
||||
memcpy_sse2_64(dd - 98, ss - 98);
|
||||
case 34:
|
||||
memcpy_sse2_32(dd - 34, ss - 34);
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 99:
|
||||
memcpy_sse2_64(dd - 99, ss - 99);
|
||||
case 35:
|
||||
memcpy_sse2_32(dd - 35, ss - 35);
|
||||
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 100:
|
||||
memcpy_sse2_64(dd - 100, ss - 100);
|
||||
case 36:
|
||||
memcpy_sse2_32(dd - 36, ss - 36);
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 101:
|
||||
memcpy_sse2_64(dd - 101, ss - 101);
|
||||
case 37:
|
||||
memcpy_sse2_32(dd - 37, ss - 37);
|
||||
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 102:
|
||||
memcpy_sse2_64(dd - 102, ss - 102);
|
||||
case 38:
|
||||
memcpy_sse2_32(dd - 38, ss - 38);
|
||||
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 103:
|
||||
memcpy_sse2_64(dd - 103, ss - 103);
|
||||
case 39:
|
||||
memcpy_sse2_32(dd - 39, ss - 39);
|
||||
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 104:
|
||||
memcpy_sse2_64(dd - 104, ss - 104);
|
||||
case 40:
|
||||
memcpy_sse2_32(dd - 40, ss - 40);
|
||||
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
|
||||
break;
|
||||
|
||||
case 105:
|
||||
memcpy_sse2_64(dd - 105, ss - 105);
|
||||
case 41:
|
||||
memcpy_sse2_32(dd - 41, ss - 41);
|
||||
*((uint64_unaligned_t*)(dd - 9)) = *((uint64_unaligned_t*)(ss - 9));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 106:
|
||||
memcpy_sse2_64(dd - 106, ss - 106);
|
||||
case 42:
|
||||
memcpy_sse2_32(dd - 42, ss - 42);
|
||||
*((uint64_unaligned_t*)(dd - 10)) = *((uint64_unaligned_t*)(ss - 10));
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 107:
|
||||
memcpy_sse2_64(dd - 107, ss - 107);
|
||||
case 43:
|
||||
memcpy_sse2_32(dd - 43, ss - 43);
|
||||
*((uint64_unaligned_t*)(dd - 11)) = *((uint64_unaligned_t*)(ss - 11));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 108:
|
||||
memcpy_sse2_64(dd - 108, ss - 108);
|
||||
case 44:
|
||||
memcpy_sse2_32(dd - 44, ss - 44);
|
||||
*((uint64_unaligned_t*)(dd - 12)) = *((uint64_unaligned_t*)(ss - 12));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 109:
|
||||
memcpy_sse2_64(dd - 109, ss - 109);
|
||||
case 45:
|
||||
memcpy_sse2_32(dd - 45, ss - 45);
|
||||
*((uint64_unaligned_t*)(dd - 13)) = *((uint64_unaligned_t*)(ss - 13));
|
||||
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 110:
|
||||
memcpy_sse2_64(dd - 110, ss - 110);
|
||||
case 46:
|
||||
memcpy_sse2_32(dd - 46, ss - 46);
|
||||
*((uint64_unaligned_t*)(dd - 14)) = *((uint64_unaligned_t*)(ss - 14));
|
||||
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
|
||||
break;
|
||||
|
||||
case 111:
|
||||
memcpy_sse2_64(dd - 111, ss - 111);
|
||||
case 47:
|
||||
memcpy_sse2_32(dd - 47, ss - 47);
|
||||
*((uint64_unaligned_t*)(dd - 15)) = *((uint64_unaligned_t*)(ss - 15));
|
||||
*((uint64_unaligned_t*)(dd - 8)) = *((uint64_unaligned_t*)(ss - 8));
|
||||
break;
|
||||
|
||||
case 112:
|
||||
memcpy_sse2_64(dd - 112, ss - 112);
|
||||
case 48:
|
||||
memcpy_sse2_32(dd - 48, ss - 48);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 113:
|
||||
memcpy_sse2_64(dd - 113, ss - 113);
|
||||
case 49:
|
||||
memcpy_sse2_32(dd - 49, ss - 49);
|
||||
memcpy_sse2_16(dd - 17, ss - 17);
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 114:
|
||||
memcpy_sse2_64(dd - 114, ss - 114);
|
||||
case 50:
|
||||
memcpy_sse2_32(dd - 50, ss - 50);
|
||||
memcpy_sse2_16(dd - 18, ss - 18);
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 115:
|
||||
memcpy_sse2_64(dd - 115, ss - 115);
|
||||
case 51:
|
||||
memcpy_sse2_32(dd - 51, ss - 51);
|
||||
memcpy_sse2_16(dd - 19, ss - 19);
|
||||
*((uint16_unaligned_t*)(dd - 3)) = *((uint16_unaligned_t*)(ss - 3));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 116:
|
||||
memcpy_sse2_64(dd - 116, ss - 116);
|
||||
case 52:
|
||||
memcpy_sse2_32(dd - 52, ss - 52);
|
||||
memcpy_sse2_16(dd - 20, ss - 20);
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 117:
|
||||
memcpy_sse2_64(dd - 117, ss - 117);
|
||||
case 53:
|
||||
memcpy_sse2_32(dd - 53, ss - 53);
|
||||
memcpy_sse2_16(dd - 21, ss - 21);
|
||||
*((uint32_unaligned_t*)(dd - 5)) = *((uint32_unaligned_t*)(ss - 5));
|
||||
dd[-1] = ss[-1];
|
||||
break;
|
||||
|
||||
case 118:
|
||||
memcpy_sse2_64(dd - 118, ss - 118);
|
||||
case 54:
|
||||
memcpy_sse2_32(dd - 54, ss - 54);
|
||||
memcpy_sse2_16(dd - 22, ss - 22);
|
||||
*((uint32_unaligned_t*)(dd - 6)) = *((uint32_unaligned_t*)(ss - 6));
|
||||
*((uint16_unaligned_t*)(dd - 2)) = *((uint16_unaligned_t*)(ss - 2));
|
||||
break;
|
||||
|
||||
case 119:
|
||||
memcpy_sse2_64(dd - 119, ss - 119);
|
||||
case 55:
|
||||
memcpy_sse2_32(dd - 55, ss - 55);
|
||||
memcpy_sse2_16(dd - 23, ss - 23);
|
||||
*((uint32_unaligned_t*)(dd - 7)) = *((uint32_unaligned_t*)(ss - 7));
|
||||
*((uint32_unaligned_t*)(dd - 4)) = *((uint32_unaligned_t*)(ss - 4));
|
||||
break;
|
||||
|
||||
case 120:
|
||||
memcpy_sse2_64(dd - 120, ss - 120);
|
||||
case 56:
|
||||
memcpy_sse2_32(dd - 56, ss - 56);
|
||||
memcpy_sse2_16(dd - 24, ss - 24);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 121:
|
||||
memcpy_sse2_64(dd - 121, ss - 121);
|
||||
case 57:
|
||||
memcpy_sse2_32(dd - 57, ss - 57);
|
||||
memcpy_sse2_16(dd - 25, ss - 25);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 122:
|
||||
memcpy_sse2_64(dd - 122, ss - 122);
|
||||
case 58:
|
||||
memcpy_sse2_32(dd - 58, ss - 58);
|
||||
memcpy_sse2_16(dd - 26, ss - 26);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 123:
|
||||
memcpy_sse2_64(dd - 123, ss - 123);
|
||||
case 59:
|
||||
memcpy_sse2_32(dd - 59, ss - 59);
|
||||
memcpy_sse2_16(dd - 27, ss - 27);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 124:
|
||||
memcpy_sse2_64(dd - 124, ss - 124);
|
||||
case 60:
|
||||
memcpy_sse2_32(dd - 60, ss - 60);
|
||||
memcpy_sse2_16(dd - 28, ss - 28);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 125:
|
||||
memcpy_sse2_64(dd - 125, ss - 125);
|
||||
case 61:
|
||||
memcpy_sse2_32(dd - 61, ss - 61);
|
||||
memcpy_sse2_16(dd - 29, ss - 29);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 126:
|
||||
memcpy_sse2_64(dd - 126, ss - 126);
|
||||
case 62:
|
||||
memcpy_sse2_32(dd - 62, ss - 62);
|
||||
memcpy_sse2_16(dd - 30, ss - 30);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 127:
|
||||
memcpy_sse2_64(dd - 127, ss - 127);
|
||||
case 63:
|
||||
memcpy_sse2_32(dd - 63, ss - 63);
|
||||
memcpy_sse2_16(dd - 31, ss - 31);
|
||||
memcpy_sse2_16(dd - 16, ss - 16);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
memcpy_sse2_128(dd - 128, ss - 128);
|
||||
break;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
// main routine
|
||||
//---------------------------------------------------------------------
|
||||
static void* memcpy_fast(void *destination, const void *source, size_t size)
|
||||
{
|
||||
unsigned char *dst = (unsigned char*)destination;
|
||||
const unsigned char *src = (const unsigned char*)source;
|
||||
static size_t cachesize = 0x200000; // L2-cache size
|
||||
size_t padding;
|
||||
|
||||
// small memory copy
|
||||
if (size <= 128) {
|
||||
return memcpy_tiny(dst, src, size);
|
||||
}
|
||||
|
||||
// align destination to 16 bytes boundary
|
||||
padding = (16 - (((size_t)dst) & 15)) & 15;
|
||||
|
||||
if (padding > 0) {
|
||||
__m128i head = _mm_loadu_si128((const __m128i*)src);
|
||||
_mm_storeu_si128((__m128i*)dst, head);
|
||||
dst += padding;
|
||||
src += padding;
|
||||
size -= padding;
|
||||
}
|
||||
|
||||
// medium size copy
|
||||
if (size <= cachesize) {
|
||||
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
for (; size >= 128; size -= 128) {
|
||||
c0 = _mm_loadu_si128(((const __m128i*)src) + 0);
|
||||
c1 = _mm_loadu_si128(((const __m128i*)src) + 1);
|
||||
c2 = _mm_loadu_si128(((const __m128i*)src) + 2);
|
||||
c3 = _mm_loadu_si128(((const __m128i*)src) + 3);
|
||||
c4 = _mm_loadu_si128(((const __m128i*)src) + 4);
|
||||
c5 = _mm_loadu_si128(((const __m128i*)src) + 5);
|
||||
c6 = _mm_loadu_si128(((const __m128i*)src) + 6);
|
||||
c7 = _mm_loadu_si128(((const __m128i*)src) + 7);
|
||||
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
|
||||
src += 128;
|
||||
_mm_store_si128((((__m128i*)dst) + 0), c0);
|
||||
_mm_store_si128((((__m128i*)dst) + 1), c1);
|
||||
_mm_store_si128((((__m128i*)dst) + 2), c2);
|
||||
_mm_store_si128((((__m128i*)dst) + 3), c3);
|
||||
_mm_store_si128((((__m128i*)dst) + 4), c4);
|
||||
_mm_store_si128((((__m128i*)dst) + 5), c5);
|
||||
_mm_store_si128((((__m128i*)dst) + 6), c6);
|
||||
_mm_store_si128((((__m128i*)dst) + 7), c7);
|
||||
dst += 128;
|
||||
}
|
||||
}
|
||||
else { // big memory copy
|
||||
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
_mm_prefetch((const char*)(src), _MM_HINT_NTA);
|
||||
|
||||
if ((((size_t)src) & 15) == 0) { // source aligned
|
||||
for (; size >= 128; size -= 128) {
|
||||
c0 = _mm_load_si128(((const __m128i*)src) + 0);
|
||||
c1 = _mm_load_si128(((const __m128i*)src) + 1);
|
||||
c2 = _mm_load_si128(((const __m128i*)src) + 2);
|
||||
c3 = _mm_load_si128(((const __m128i*)src) + 3);
|
||||
c4 = _mm_load_si128(((const __m128i*)src) + 4);
|
||||
c5 = _mm_load_si128(((const __m128i*)src) + 5);
|
||||
c6 = _mm_load_si128(((const __m128i*)src) + 6);
|
||||
c7 = _mm_load_si128(((const __m128i*)src) + 7);
|
||||
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
|
||||
src += 128;
|
||||
_mm_stream_si128((((__m128i*)dst) + 0), c0);
|
||||
_mm_stream_si128((((__m128i*)dst) + 1), c1);
|
||||
_mm_stream_si128((((__m128i*)dst) + 2), c2);
|
||||
_mm_stream_si128((((__m128i*)dst) + 3), c3);
|
||||
_mm_stream_si128((((__m128i*)dst) + 4), c4);
|
||||
_mm_stream_si128((((__m128i*)dst) + 5), c5);
|
||||
_mm_stream_si128((((__m128i*)dst) + 6), c6);
|
||||
_mm_stream_si128((((__m128i*)dst) + 7), c7);
|
||||
dst += 128;
|
||||
}
|
||||
}
|
||||
else { // source unaligned
|
||||
for (; size >= 128; size -= 128) {
|
||||
c0 = _mm_loadu_si128(((const __m128i*)src) + 0);
|
||||
c1 = _mm_loadu_si128(((const __m128i*)src) + 1);
|
||||
c2 = _mm_loadu_si128(((const __m128i*)src) + 2);
|
||||
c3 = _mm_loadu_si128(((const __m128i*)src) + 3);
|
||||
c4 = _mm_loadu_si128(((const __m128i*)src) + 4);
|
||||
c5 = _mm_loadu_si128(((const __m128i*)src) + 5);
|
||||
c6 = _mm_loadu_si128(((const __m128i*)src) + 6);
|
||||
c7 = _mm_loadu_si128(((const __m128i*)src) + 7);
|
||||
_mm_prefetch((const char*)(src + 256), _MM_HINT_NTA);
|
||||
src += 128;
|
||||
_mm_stream_si128((((__m128i*)dst) + 0), c0);
|
||||
_mm_stream_si128((((__m128i*)dst) + 1), c1);
|
||||
_mm_stream_si128((((__m128i*)dst) + 2), c2);
|
||||
_mm_stream_si128((((__m128i*)dst) + 3), c3);
|
||||
_mm_stream_si128((((__m128i*)dst) + 4), c4);
|
||||
_mm_stream_si128((((__m128i*)dst) + 5), c5);
|
||||
_mm_stream_si128((((__m128i*)dst) + 6), c6);
|
||||
_mm_stream_si128((((__m128i*)dst) + 7), c7);
|
||||
dst += 128;
|
||||
}
|
||||
}
|
||||
_mm_sfence();
|
||||
}
|
||||
|
||||
memcpy_tiny(dst, src, size);
|
||||
|
||||
return destination;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
203
be/src/glibc-compatibility/LICENSE_ClickHouse
Normal file
203
be/src/glibc-compatibility/LICENSE_ClickHouse
Normal file
@ -0,0 +1,203 @@
|
||||
Copyright 2016-2020 Yandex LLC
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016-2020 Yandex LLC
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
22
be/src/glibc-compatibility/LICENSE_FastMemcpy
Normal file
22
be/src/glibc-compatibility/LICENSE_FastMemcpy
Normal file
@ -0,0 +1,22 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Linwei
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
186
be/src/glibc-compatibility/glibc-compatibility.c
Normal file
186
be/src/glibc-compatibility/glibc-compatibility.c
Normal file
@ -0,0 +1,186 @@
|
||||
/** Allows to build programs with libc 2.27 and run on systems with at least libc 2.4,
|
||||
* such as Ubuntu Hardy or CentOS 5.
|
||||
*
|
||||
* Also look at http://www.lightofdawn.org/wiki/wiki.cgi/NewAppsOnOldGlibc
|
||||
*/
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
size_t __pthread_get_minstack(const pthread_attr_t * attr)
|
||||
{
|
||||
return 1048576; /// This is a guess. Don't sure it is correct.
|
||||
}
|
||||
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
long int syscall(long int __sysno, ...) __THROW;
|
||||
|
||||
int __gai_sigqueue(int sig, const union sigval val, pid_t caller_pid)
|
||||
{
|
||||
siginfo_t info;
|
||||
|
||||
memset(&info, 0, sizeof(siginfo_t));
|
||||
info.si_signo = sig;
|
||||
info.si_code = SI_ASYNCNL;
|
||||
info.si_pid = caller_pid;
|
||||
info.si_uid = getuid();
|
||||
info.si_value = val;
|
||||
|
||||
return syscall(__NR_rt_sigqueueinfo, info.si_pid, sig, &info);
|
||||
}
|
||||
|
||||
|
||||
#include <sys/select.h>
|
||||
#include <stdlib.h>
|
||||
#include <features.h>
|
||||
|
||||
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16)
|
||||
long int __fdelt_chk(long int d)
|
||||
{
|
||||
if (d < 0)
|
||||
abort();
|
||||
#else
|
||||
unsigned long int __fdelt_chk(unsigned long int d)
|
||||
{
|
||||
#endif
|
||||
if (d >= FD_SETSIZE)
|
||||
abort();
|
||||
return d / __NFDBITS;
|
||||
}
|
||||
|
||||
#include <sys/poll.h>
|
||||
#include <stddef.h>
|
||||
|
||||
int __poll_chk(struct pollfd * fds, nfds_t nfds, int timeout, size_t fdslen)
|
||||
{
|
||||
if (fdslen / sizeof(*fds) < nfds)
|
||||
abort();
|
||||
return poll(fds, nfds, timeout);
|
||||
}
|
||||
|
||||
#include <setjmp.h>
|
||||
|
||||
void musl_glibc_longjmp(jmp_buf env, int val);
|
||||
|
||||
/// NOTE This disables some of FORTIFY_SOURCE functionality.
|
||||
|
||||
void __longjmp_chk(jmp_buf env, int val)
|
||||
{
|
||||
musl_glibc_longjmp(env, val);
|
||||
}
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
int vasprintf(char **s, const char *fmt, va_list ap);
|
||||
|
||||
int __vasprintf_chk(char **s, int unused, const char *fmt, va_list ap)
|
||||
{
|
||||
return vasprintf(s, fmt, ap);
|
||||
}
|
||||
|
||||
int __asprintf_chk(char **result_ptr, int unused, const char *format, ...)
|
||||
{
|
||||
int ret;
|
||||
va_list ap;
|
||||
va_start (ap, format);
|
||||
ret = vasprintf(result_ptr, format, ap);
|
||||
va_end (ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int vdprintf(int fd, const char *format, va_list ap);
|
||||
|
||||
int __dprintf_chk (int d, int unused, const char *format, ...)
|
||||
{
|
||||
int ret;
|
||||
va_list ap;
|
||||
va_start (ap, format);
|
||||
ret = vdprintf(d, format, ap);
|
||||
va_end (ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t fread(void *ptr, size_t size, size_t nmemb, void *stream);
|
||||
|
||||
size_t __fread_chk(void *ptr, size_t unused, size_t size, size_t nmemb, void *stream)
|
||||
{
|
||||
return fread(ptr, size, nmemb, stream);
|
||||
}
|
||||
|
||||
int vsscanf(const char *str, const char *format, va_list ap);
|
||||
|
||||
int __isoc99_vsscanf(const char *str, const char *format, va_list ap)
|
||||
{
|
||||
return vsscanf(str, format, ap);
|
||||
}
|
||||
|
||||
int sscanf(const char *restrict s, const char *restrict fmt, ...)
|
||||
{
|
||||
int ret;
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
ret = vsscanf(s, fmt, ap);
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __isoc99_sscanf(const char *str, const char *format, ...) __attribute__((weak, nonnull, nothrow, alias("sscanf")));
|
||||
|
||||
int open(const char *path, int oflag);
|
||||
|
||||
int __open_2(const char *path, int oflag)
|
||||
{
|
||||
return open(path, oflag);
|
||||
}
|
||||
|
||||
|
||||
/// No-ops.
|
||||
int pthread_setname_np(pthread_t thread, const char *name) { return 0; }
|
||||
int pthread_getname_np(pthread_t thread, char *name, size_t len) { name[0] = '\0'; return 0; };
|
||||
|
||||
|
||||
#define SHMDIR "/dev/shm/"
|
||||
const char * __shm_directory(size_t * len)
|
||||
{
|
||||
*len = sizeof(SHMDIR) - 1;
|
||||
return SHMDIR;
|
||||
}
|
||||
|
||||
|
||||
/// https://boringssl.googlesource.com/boringssl/+/ad1907fe73334d6c696c8539646c21b11178f20f%5E!/#F0
|
||||
/* Copyright (c) 2015, Google Inc.
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
void __attribute__((__weak__)) explicit_bzero(void * buf, size_t len)
|
||||
{
|
||||
memset(buf, 0, len);
|
||||
__asm__ __volatile__("" :: "r"(buf) : "memory");
|
||||
}
|
||||
|
||||
void __explicit_bzero_chk(void * buf, size_t len, size_t unused)
|
||||
{
|
||||
return explicit_bzero(buf, len);
|
||||
}
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
6
be/src/glibc-compatibility/memcpy_wrapper.c
Normal file
6
be/src/glibc-compatibility/memcpy_wrapper.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "FastMemcpy.h"
|
||||
|
||||
void * memcpy(void * __restrict destination, const void * __restrict source, size_t size)
|
||||
{
|
||||
return memcpy_fast(destination, source, size);
|
||||
}
|
||||
163
be/src/glibc-compatibility/musl/COPYRIGHT
Normal file
163
be/src/glibc-compatibility/musl/COPYRIGHT
Normal file
@ -0,0 +1,163 @@
|
||||
musl as a whole is licensed under the following standard MIT license:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Copyright © 2005-2014 Rich Felker, et al.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Authors/contributors include:
|
||||
|
||||
Alex Dowad
|
||||
Alexander Monakov
|
||||
Anthony G. Basile
|
||||
Arvid Picciani
|
||||
Bobby Bingham
|
||||
Boris Brezillon
|
||||
Brent Cook
|
||||
Chris Spiegel
|
||||
Clément Vasseur
|
||||
Daniel Micay
|
||||
Denys Vlasenko
|
||||
Emil Renner Berthing
|
||||
Felix Fietkau
|
||||
Felix Janda
|
||||
Gianluca Anzolin
|
||||
Hauke Mehrtens
|
||||
Hiltjo Posthuma
|
||||
Isaac Dunham
|
||||
Jaydeep Patil
|
||||
Jens Gustedt
|
||||
Jeremy Huntwork
|
||||
Jo-Philipp Wich
|
||||
Joakim Sindholt
|
||||
John Spencer
|
||||
Josiah Worcester
|
||||
Justin Cormack
|
||||
Khem Raj
|
||||
Kylie McClain
|
||||
Luca Barbato
|
||||
Luka Perkov
|
||||
M Farkas-Dyck (Strake)
|
||||
Mahesh Bodapati
|
||||
Michael Forney
|
||||
Natanael Copa
|
||||
Nicholas J. Kain
|
||||
orc
|
||||
Pascal Cuoq
|
||||
Petr Hosek
|
||||
Pierre Carrier
|
||||
Rich Felker
|
||||
Richard Pennington
|
||||
Shiz
|
||||
sin
|
||||
Solar Designer
|
||||
Stefan Kristiansson
|
||||
Szabolcs Nagy
|
||||
Timo Teräs
|
||||
Trutz Behn
|
||||
Valentin Ochs
|
||||
William Haddon
|
||||
|
||||
Portions of this software are derived from third-party works licensed
|
||||
under terms compatible with the above MIT license:
|
||||
|
||||
The TRE regular expression implementation (src/regex/reg* and
|
||||
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
|
||||
under a 2-clause BSD license (license text in the source files). The
|
||||
included version has been heavily modified by Rich Felker in 2012, in
|
||||
the interests of size, simplicity, and namespace cleanliness.
|
||||
|
||||
Much of the math library code (src/math/* and src/complex/*) is
|
||||
Copyright © 1993,2004 Sun Microsystems or
|
||||
Copyright © 2003-2011 David Schultz or
|
||||
Copyright © 2003-2009 Steven G. Kargl or
|
||||
Copyright © 2003-2009 Bruce D. Evans or
|
||||
Copyright © 2008 Stephen L. Moshier
|
||||
and labelled as such in comments in the individual source files. All
|
||||
have been licensed under extremely permissive terms.
|
||||
|
||||
The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
|
||||
The Android Open Source Project and is licensed under a two-clause BSD
|
||||
license. It was taken from Bionic libc, used on Android.
|
||||
|
||||
The implementation of DES for crypt (src/crypt/crypt_des.c) is
|
||||
Copyright © 1994 David Burren. It is licensed under a BSD license.
|
||||
|
||||
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
|
||||
originally written by Solar Designer and placed into the public
|
||||
domain. The code also comes with a fallback permissive license for use
|
||||
in jurisdictions that may not recognize the public domain.
|
||||
|
||||
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
|
||||
Valentin Ochs and is licensed under an MIT-style license.
|
||||
|
||||
The BSD PRNG implementation (src/prng/random.c) and XSI search API
|
||||
(src/search/*.c) functions are Copyright © 2011 Szabolcs Nagy and
|
||||
licensed under following terms: "Permission to use, copy, modify,
|
||||
and/or distribute this code for any purpose with or without fee is
|
||||
hereby granted. There is no warranty."
|
||||
|
||||
The x86_64 port was written by Nicholas J. Kain and is licensed under
|
||||
the standard MIT terms.
|
||||
|
||||
The mips and microblaze ports were originally written by Richard
|
||||
Pennington for use in the ellcc project. The original code was adapted
|
||||
by Rich Felker for build system and code conventions during upstream
|
||||
integration. It is licensed under the standard MIT terms.
|
||||
|
||||
The mips64 port was contributed by Imagination Technologies and is
|
||||
licensed under the standard MIT terms.
|
||||
|
||||
The powerpc port was also originally written by Richard Pennington,
|
||||
and later supplemented and integrated by John Spencer. It is licensed
|
||||
under the standard MIT terms.
|
||||
|
||||
All other files which have no copyright comments are original works
|
||||
produced specifically for use as part of this library, written either
|
||||
by Rich Felker, the main author of the library, or by one or more
|
||||
contibutors listed above. Details on authorship of individual files
|
||||
can be found in the git version control history of the project. The
|
||||
omission of copyright and license comments in each file is in the
|
||||
interest of source tree size.
|
||||
|
||||
In addition, permission is hereby granted for all public header files
|
||||
(include/* and arch/*/bits/*) and crt files intended to be linked into
|
||||
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
|
||||
the copyright notice and permission notice otherwise required by the
|
||||
license, and to use these files without any requirement of
|
||||
attribution. These files include substantial contributions from:
|
||||
|
||||
Bobby Bingham
|
||||
John Spencer
|
||||
Nicholas J. Kain
|
||||
Rich Felker
|
||||
Richard Pennington
|
||||
Stefan Kristiansson
|
||||
Szabolcs Nagy
|
||||
|
||||
all of whom have explicitly granted such permission.
|
||||
|
||||
This file previously contained text expressing a belief that most of
|
||||
the files covered by the above exception were sufficiently trivial not
|
||||
to be subject to copyright, resulting in confusion over whether it
|
||||
negated the permissions granted in the license. In the spirit of
|
||||
permissive licensing, and of not having licensing issues being an
|
||||
obstacle to adoption, that text has been removed.
|
||||
8
be/src/glibc-compatibility/musl/README
Normal file
8
be/src/glibc-compatibility/musl/README
Normal file
@ -0,0 +1,8 @@
|
||||
Tiny pieces extracted from MUSL library.
|
||||
|
||||
git://git.musl-libc.org/musl
|
||||
c10bc61508dc52b8315084e628f36a6c3c2dabb1
|
||||
|
||||
NOTE: Files was edited.
|
||||
|
||||
NOTE: Math related files are pulled from commit 6ad514e4e278f0c3b18eb2db1d45638c9af1c07f.
|
||||
6
be/src/glibc-compatibility/musl/__math_divzero.c
Normal file
6
be/src/glibc-compatibility/musl/__math_divzero.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
double __math_divzero(uint32_t sign)
|
||||
{
|
||||
return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_divzerof.c
Normal file
6
be/src/glibc-compatibility/musl/__math_divzerof.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
float __math_divzerof(uint32_t sign)
|
||||
{
|
||||
return fp_barrierf(sign ? -1.0f : 1.0f) / 0.0f;
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_invalid.c
Normal file
6
be/src/glibc-compatibility/musl/__math_invalid.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
double __math_invalid(double x)
|
||||
{
|
||||
return (x - x) / (x - x);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_invalidf.c
Normal file
6
be/src/glibc-compatibility/musl/__math_invalidf.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
float __math_invalidf(float x)
|
||||
{
|
||||
return (x - x) / (x - x);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_oflow.c
Normal file
6
be/src/glibc-compatibility/musl/__math_oflow.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
double __math_oflow(uint32_t sign)
|
||||
{
|
||||
return __math_xflow(sign, 0x1p769);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_oflowf.c
Normal file
6
be/src/glibc-compatibility/musl/__math_oflowf.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
float __math_oflowf(uint32_t sign)
|
||||
{
|
||||
return __math_xflowf(sign, 0x1p97f);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_uflow.c
Normal file
6
be/src/glibc-compatibility/musl/__math_uflow.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
double __math_uflow(uint32_t sign)
|
||||
{
|
||||
return __math_xflow(sign, 0x1p-767);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_uflowf.c
Normal file
6
be/src/glibc-compatibility/musl/__math_uflowf.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
float __math_uflowf(uint32_t sign)
|
||||
{
|
||||
return __math_xflowf(sign, 0x1p-95f);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_xflow.c
Normal file
6
be/src/glibc-compatibility/musl/__math_xflow.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
double __math_xflow(uint32_t sign, double y)
|
||||
{
|
||||
return eval_as_double(fp_barrier(sign ? -y : y) * y);
|
||||
}
|
||||
6
be/src/glibc-compatibility/musl/__math_xflowf.c
Normal file
6
be/src/glibc-compatibility/musl/__math_xflowf.c
Normal file
@ -0,0 +1,6 @@
|
||||
#include "libm.h"
|
||||
|
||||
float __math_xflowf(uint32_t sign, float y)
|
||||
{
|
||||
return eval_as_float(fp_barrierf(sign ? -y : y) * y);
|
||||
}
|
||||
82
be/src/glibc-compatibility/musl/aarch64/atomic_arch.h
Normal file
82
be/src/glibc-compatibility/musl/aarch64/atomic_arch.h
Normal file
@ -0,0 +1,82 @@
|
||||
#define a_ll a_ll
|
||||
static inline int a_ll(volatile int *p)
|
||||
{
|
||||
int v;
|
||||
__asm__ __volatile__ ("ldaxr %w0,%1" : "=r"(v) : "Q"(*p));
|
||||
return v;
|
||||
}
|
||||
|
||||
#define a_sc a_sc
|
||||
static inline int a_sc(volatile int *p, int v)
|
||||
{
|
||||
int r;
|
||||
__asm__ __volatile__ ("stlxr %w0,%w2,%1" : "=&r"(r), "=Q"(*p) : "r"(v) : "memory");
|
||||
return !r;
|
||||
}
|
||||
|
||||
#define a_barrier a_barrier
|
||||
static inline void a_barrier()
|
||||
{
|
||||
__asm__ __volatile__ ("dmb ish" : : : "memory");
|
||||
}
|
||||
|
||||
#define a_cas a_cas
|
||||
static inline int a_cas(volatile int *p, int t, int s)
|
||||
{
|
||||
int old;
|
||||
do {
|
||||
old = a_ll(p);
|
||||
if (old != t) {
|
||||
a_barrier();
|
||||
break;
|
||||
}
|
||||
} while (!a_sc(p, s));
|
||||
return old;
|
||||
}
|
||||
|
||||
#define a_ll_p a_ll_p
|
||||
static inline void *a_ll_p(volatile void *p)
|
||||
{
|
||||
void *v;
|
||||
__asm__ __volatile__ ("ldaxr %0, %1" : "=r"(v) : "Q"(*(void *volatile *)p));
|
||||
return v;
|
||||
}
|
||||
|
||||
#define a_sc_p a_sc_p
|
||||
static inline int a_sc_p(volatile int *p, void *v)
|
||||
{
|
||||
int r;
|
||||
__asm__ __volatile__ ("stlxr %w0,%2,%1" : "=&r"(r), "=Q"(*(void *volatile *)p) : "r"(v) : "memory");
|
||||
return !r;
|
||||
}
|
||||
|
||||
#define a_cas_p a_cas_p
|
||||
static inline void *a_cas_p(volatile void *p, void *t, void *s)
|
||||
{
|
||||
void *old;
|
||||
do {
|
||||
old = a_ll_p(p);
|
||||
if (old != t) {
|
||||
a_barrier();
|
||||
break;
|
||||
}
|
||||
} while (!a_sc_p(p, s));
|
||||
return old;
|
||||
}
|
||||
|
||||
#define a_ctz_64 a_ctz_64
|
||||
static inline int a_ctz_64(uint64_t x)
|
||||
{
|
||||
__asm__(
|
||||
" rbit %0, %1\n"
|
||||
" clz %0, %0\n"
|
||||
: "=r"(x) : "r"(x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define a_clz_64 a_clz_64
|
||||
static inline int a_clz_64(uint64_t x)
|
||||
{
|
||||
__asm__("clz %0, %1" : "=r"(x) : "r"(x));
|
||||
return x;
|
||||
}
|
||||
21
be/src/glibc-compatibility/musl/aarch64/longjmp.s
Normal file
21
be/src/glibc-compatibility/musl/aarch64/longjmp.s
Normal file
@ -0,0 +1,21 @@
|
||||
.global musl_glibc_longjmp
|
||||
.type musl_glibc_longjmp,@function
|
||||
musl_glibc_longjmp:
|
||||
// IHI0055B_aapcs64.pdf 5.1.1, 5.1.2 callee saved registers
|
||||
ldp x19, x20, [x0,#0]
|
||||
ldp x21, x22, [x0,#16]
|
||||
ldp x23, x24, [x0,#32]
|
||||
ldp x25, x26, [x0,#48]
|
||||
ldp x27, x28, [x0,#64]
|
||||
ldp x29, x30, [x0,#80]
|
||||
ldr x2, [x0,#104]
|
||||
mov sp, x2
|
||||
ldp d8 , d9, [x0,#112]
|
||||
ldp d10, d11, [x0,#128]
|
||||
ldp d12, d13, [x0,#144]
|
||||
ldp d14, d15, [x0,#160]
|
||||
|
||||
mov x0, x1
|
||||
cbnz x1, 1f
|
||||
mov x0, #1
|
||||
1: br x30
|
||||
14
be/src/glibc-compatibility/musl/aarch64/syscall.s
Normal file
14
be/src/glibc-compatibility/musl/aarch64/syscall.s
Normal file
@ -0,0 +1,14 @@
|
||||
.global __syscall
|
||||
.hidden __syscall
|
||||
.type __syscall,%function
|
||||
__syscall:
|
||||
uxtw x8,w0
|
||||
mov x0,x1
|
||||
mov x1,x2
|
||||
mov x2,x3
|
||||
mov x3,x4
|
||||
mov x4,x5
|
||||
mov x5,x6
|
||||
mov x6,x7
|
||||
svc 0
|
||||
ret
|
||||
3
be/src/glibc-compatibility/musl/aarch64/syscall_arch.h
Normal file
3
be/src/glibc-compatibility/musl/aarch64/syscall_arch.h
Normal file
@ -0,0 +1,3 @@
|
||||
#define VDSO_USEFUL
|
||||
#define VDSO_CGT_SYM "__kernel_clock_gettime"
|
||||
#define VDSO_CGT_VER "LINUX_2.6.39"
|
||||
318
be/src/glibc-compatibility/musl/atomic.h
Normal file
318
be/src/glibc-compatibility/musl/atomic.h
Normal file
@ -0,0 +1,318 @@
|
||||
#ifndef _ATOMIC_H
|
||||
#define _ATOMIC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "atomic_arch.h"
|
||||
|
||||
#ifdef a_ll
|
||||
|
||||
#ifndef a_pre_llsc
|
||||
#define a_pre_llsc()
|
||||
#endif
|
||||
|
||||
#ifndef a_post_llsc
|
||||
#define a_post_llsc()
|
||||
#endif
|
||||
|
||||
#ifndef a_cas
|
||||
#define a_cas a_cas
|
||||
static inline int a_cas(volatile int *p, int t, int s)
|
||||
{
|
||||
int old;
|
||||
a_pre_llsc();
|
||||
do old = a_ll(p);
|
||||
while (old==t && !a_sc(p, s));
|
||||
a_post_llsc();
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_swap
|
||||
#define a_swap a_swap
|
||||
static inline int a_swap(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
a_pre_llsc();
|
||||
do old = a_ll(p);
|
||||
while (!a_sc(p, v));
|
||||
a_post_llsc();
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_fetch_add
|
||||
#define a_fetch_add a_fetch_add
|
||||
static inline int a_fetch_add(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
a_pre_llsc();
|
||||
do old = a_ll(p);
|
||||
while (!a_sc(p, (unsigned)old + v));
|
||||
a_post_llsc();
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_fetch_and
|
||||
#define a_fetch_and a_fetch_and
|
||||
static inline int a_fetch_and(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
a_pre_llsc();
|
||||
do old = a_ll(p);
|
||||
while (!a_sc(p, old & v));
|
||||
a_post_llsc();
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_fetch_or
|
||||
#define a_fetch_or a_fetch_or
|
||||
static inline int a_fetch_or(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
a_pre_llsc();
|
||||
do old = a_ll(p);
|
||||
while (!a_sc(p, old | v));
|
||||
a_post_llsc();
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef a_ll_p
|
||||
|
||||
#ifndef a_cas_p
|
||||
#define a_cas_p a_cas_p
|
||||
static inline void *a_cas_p(volatile void *p, void *t, void *s)
|
||||
{
|
||||
void *old;
|
||||
a_pre_llsc();
|
||||
do old = a_ll_p(p);
|
||||
while (old==t && !a_sc_p(p, s));
|
||||
a_post_llsc();
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef a_cas
|
||||
#error missing definition of a_cas
|
||||
#endif
|
||||
|
||||
#ifndef a_swap
|
||||
#define a_swap a_swap
|
||||
static inline int a_swap(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
do old = *p;
|
||||
while (a_cas(p, old, v) != old);
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_fetch_add
|
||||
#define a_fetch_add a_fetch_add
|
||||
static inline int a_fetch_add(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
do old = *p;
|
||||
while (a_cas(p, old, (unsigned)old+v) != old);
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_fetch_and
|
||||
#define a_fetch_and a_fetch_and
|
||||
static inline int a_fetch_and(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
do old = *p;
|
||||
while (a_cas(p, old, old&v) != old);
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
#ifndef a_fetch_or
|
||||
#define a_fetch_or a_fetch_or
|
||||
static inline int a_fetch_or(volatile int *p, int v)
|
||||
{
|
||||
int old;
|
||||
do old = *p;
|
||||
while (a_cas(p, old, old|v) != old);
|
||||
return old;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_and
|
||||
#define a_and a_and
|
||||
static inline void a_and(volatile int *p, int v)
|
||||
{
|
||||
a_fetch_and(p, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_or
|
||||
#define a_or a_or
|
||||
static inline void a_or(volatile int *p, int v)
|
||||
{
|
||||
a_fetch_or(p, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_inc
|
||||
#define a_inc a_inc
|
||||
static inline void a_inc(volatile int *p)
|
||||
{
|
||||
a_fetch_add(p, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_dec
|
||||
#define a_dec a_dec
|
||||
static inline void a_dec(volatile int *p)
|
||||
{
|
||||
a_fetch_add(p, -1);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_store
|
||||
#define a_store a_store
|
||||
static inline void a_store(volatile int *p, int v)
|
||||
{
|
||||
#ifdef a_barrier
|
||||
a_barrier();
|
||||
*p = v;
|
||||
a_barrier();
|
||||
#else
|
||||
a_swap(p, v);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_barrier
|
||||
#define a_barrier a_barrier
|
||||
static void a_barrier()
|
||||
{
|
||||
volatile int tmp = 0;
|
||||
a_cas(&tmp, 0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_spin
|
||||
#define a_spin a_barrier
|
||||
#endif
|
||||
|
||||
#ifndef a_and_64
|
||||
#define a_and_64 a_and_64
|
||||
static inline void a_and_64(volatile uint64_t *p, uint64_t v)
|
||||
{
|
||||
union { uint64_t v; uint32_t r[2]; } u = { v };
|
||||
if (u.r[0]+1) a_and((int *)p, u.r[0]);
|
||||
if (u.r[1]+1) a_and((int *)p+1, u.r[1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_or_64
|
||||
#define a_or_64 a_or_64
|
||||
static inline void a_or_64(volatile uint64_t *p, uint64_t v)
|
||||
{
|
||||
union { uint64_t v; uint32_t r[2]; } u = { v };
|
||||
if (u.r[0]) a_or((int *)p, u.r[0]);
|
||||
if (u.r[1]) a_or((int *)p+1, u.r[1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_cas_p
|
||||
typedef char a_cas_p_undefined_but_pointer_not_32bit[-sizeof(char) == 0xffffffff ? 1 : -1];
|
||||
#define a_cas_p a_cas_p
|
||||
static inline void *a_cas_p(volatile void *p, void *t, void *s)
|
||||
{
|
||||
return (void *)a_cas((volatile int *)p, (int)t, (int)s);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_or_l
|
||||
#define a_or_l a_or_l
|
||||
static inline void a_or_l(volatile void *p, long v)
|
||||
{
|
||||
if (sizeof(long) == sizeof(int)) a_or(p, v);
|
||||
else a_or_64(p, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_crash
|
||||
#define a_crash a_crash
|
||||
static inline void a_crash()
|
||||
{
|
||||
*(volatile char *)0=0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_ctz_32
|
||||
#define a_ctz_32 a_ctz_32
|
||||
static inline int a_ctz_32(uint32_t x)
|
||||
{
|
||||
#ifdef a_clz_32
|
||||
return 31-a_clz_32(x&-x);
|
||||
#else
|
||||
static const char debruijn32[32] = {
|
||||
0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
|
||||
31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
|
||||
};
|
||||
return debruijn32[(x&-x)*0x076be629 >> 27];
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef a_ctz_64
|
||||
#define a_ctz_64 a_ctz_64
|
||||
static inline int a_ctz_64(uint64_t x)
|
||||
{
|
||||
static const char debruijn64[64] = {
|
||||
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
|
||||
62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
|
||||
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
|
||||
51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
|
||||
};
|
||||
if (sizeof(long) < 8) {
|
||||
uint32_t y = x;
|
||||
if (!y) {
|
||||
y = x>>32;
|
||||
return 32 + a_ctz_32(y);
|
||||
}
|
||||
return a_ctz_32(y);
|
||||
}
|
||||
return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int a_ctz_l(unsigned long x)
|
||||
{
|
||||
return (sizeof(long) < 8) ? a_ctz_32(x) : a_ctz_64(x);
|
||||
}
|
||||
|
||||
#ifndef a_clz_64
|
||||
#define a_clz_64 a_clz_64
|
||||
static inline int a_clz_64(uint64_t x)
|
||||
{
|
||||
#ifdef a_clz_32
|
||||
if (x>>32)
|
||||
return a_clz_32(x>>32);
|
||||
return a_clz_32(x) + 32;
|
||||
#else
|
||||
uint32_t y;
|
||||
int r;
|
||||
if (x>>32) y=x>>32, r=0; else y=x, r=32;
|
||||
if (y>>16) y>>=16; else r |= 16;
|
||||
if (y>>8) y>>=8; else r |= 8;
|
||||
if (y>>4) y>>=4; else r |= 4;
|
||||
if (y>>2) y>>=2; else r |= 2;
|
||||
return r | !(y>>1);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
22
be/src/glibc-compatibility/musl/clock_getres.c
Normal file
22
be/src/glibc-compatibility/musl/clock_getres.c
Normal file
@ -0,0 +1,22 @@
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int clock_getres(clockid_t clk, struct timespec *ts)
|
||||
{
|
||||
#ifdef SYS_clock_getres_time64
|
||||
/* On a 32-bit arch, use the old syscall if it exists. */
|
||||
if (SYS_clock_getres != SYS_clock_getres_time64) {
|
||||
long ts32[2];
|
||||
int r = __syscall(SYS_clock_getres, clk, ts32);
|
||||
if (!r && ts) {
|
||||
ts->tv_sec = ts32[0];
|
||||
ts->tv_nsec = ts32[1];
|
||||
}
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
#endif
|
||||
/* If reaching this point, it's a 64-bit arch or time64-only
|
||||
* 32-bit arch and we can get result directly into timespec. */
|
||||
return syscall(SYS_clock_getres, clk, ts);
|
||||
}
|
||||
105
be/src/glibc-compatibility/musl/clock_gettime.c
Normal file
105
be/src/glibc-compatibility/musl/clock_gettime.c
Normal file
@ -0,0 +1,105 @@
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include "atomic.h"
|
||||
#include "syscall.h"
|
||||
|
||||
#ifdef VDSO_CGT_SYM
|
||||
|
||||
static void *volatile vdso_func;
|
||||
|
||||
#ifdef VDSO_CGT32_SYM
|
||||
static void *volatile vdso_func_32;
|
||||
static int cgt_time32_wrap(clockid_t clk, struct timespec *ts)
|
||||
{
|
||||
long ts32[2];
|
||||
int (*f)(clockid_t, long[2]) =
|
||||
(int (*)(clockid_t, long[2]))vdso_func_32;
|
||||
int r = f(clk, ts32);
|
||||
if (!r) {
|
||||
/* Fallback to syscalls if time32 overflowed. Maybe
|
||||
* we lucked out and somehow migrated to a kernel with
|
||||
* time64 syscalls available. */
|
||||
if (ts32[0] < 0) {
|
||||
a_cas_p(&vdso_func, (void *)cgt_time32_wrap, 0);
|
||||
return -ENOSYS;
|
||||
}
|
||||
ts->tv_sec = ts32[0];
|
||||
ts->tv_nsec = ts32[1];
|
||||
}
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int cgt_init(clockid_t clk, struct timespec *ts)
|
||||
{
|
||||
void *p = __vdsosym(VDSO_CGT_VER, VDSO_CGT_SYM);
|
||||
#ifdef VDSO_CGT32_SYM
|
||||
if (!p) {
|
||||
void *q = __vdsosym(VDSO_CGT32_VER, VDSO_CGT32_SYM);
|
||||
if (q) {
|
||||
a_cas_p(&vdso_func_32, 0, q);
|
||||
p = cgt_time32_wrap;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
int (*f)(clockid_t, struct timespec *) =
|
||||
(int (*)(clockid_t, struct timespec *))p;
|
||||
a_cas_p(&vdso_func, (void *)cgt_init, p);
|
||||
return f ? f(clk, ts) : -ENOSYS;
|
||||
}
|
||||
|
||||
static void *volatile vdso_func = (void *)cgt_init;
|
||||
|
||||
#endif
|
||||
|
||||
int clock_gettime(clockid_t clk, struct timespec *ts)
|
||||
{
|
||||
int r;
|
||||
|
||||
#ifdef VDSO_CGT_SYM
|
||||
int (*f)(clockid_t, struct timespec *) =
|
||||
(int (*)(clockid_t, struct timespec *))vdso_func;
|
||||
if (f) {
|
||||
r = f(clk, ts);
|
||||
if (!r) return r;
|
||||
if (r == -EINVAL) return __syscall_ret(r);
|
||||
/* Fall through on errors other than EINVAL. Some buggy
|
||||
* vdso implementations return ENOSYS for clocks they
|
||||
* can't handle, rather than making the syscall. This
|
||||
* also handles the case where cgt_init fails to find
|
||||
* a vdso function to use. */
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SYS_clock_gettime64
|
||||
r = -ENOSYS;
|
||||
if (sizeof(time_t) > 4)
|
||||
r = __syscall(SYS_clock_gettime64, clk, ts);
|
||||
if (SYS_clock_gettime == SYS_clock_gettime64 || r!=-ENOSYS)
|
||||
return __syscall_ret(r);
|
||||
long ts32[2];
|
||||
r = __syscall(SYS_clock_gettime, clk, ts32);
|
||||
if (r==-ENOSYS && clk==CLOCK_REALTIME) {
|
||||
r = __syscall(SYS_gettimeofday, ts32, 0);
|
||||
ts32[1] *= 1000;
|
||||
}
|
||||
if (!r) {
|
||||
ts->tv_sec = ts32[0];
|
||||
ts->tv_nsec = ts32[1];
|
||||
return r;
|
||||
}
|
||||
return __syscall_ret(r);
|
||||
#else
|
||||
r = __syscall(SYS_clock_gettime, clk, ts);
|
||||
if (r == -ENOSYS) {
|
||||
if (clk == CLOCK_REALTIME) {
|
||||
__syscall(SYS_gettimeofday, ts, 0);
|
||||
ts->tv_nsec = (int)ts->tv_nsec * 1000;
|
||||
return 0;
|
||||
}
|
||||
r = -EINVAL;
|
||||
}
|
||||
return __syscall_ret(r);
|
||||
#endif
|
||||
}
|
||||
24
be/src/glibc-compatibility/musl/clock_nanosleep.c
Normal file
24
be/src/glibc-compatibility/musl/clock_nanosleep.c
Normal file
@ -0,0 +1,24 @@
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <time.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int clock_nanosleep(clockid_t clk, int flags, const struct timespec * req, struct timespec * rem)
|
||||
{
|
||||
if (clk == CLOCK_THREAD_CPUTIME_ID)
|
||||
return EINVAL;
|
||||
int old_cancel_type;
|
||||
int status;
|
||||
/// We cannot port __syscall_cp because musl has very limited cancellation point implementation.
|
||||
/// For example, c++ destructors won't get called and exception unwinding isn't implemented.
|
||||
/// Instead, we use normal __syscall here and turn on the asynchrous cancel mode to allow
|
||||
/// cancel. This works because nanosleep doesn't contain any resource allocations or
|
||||
/// deallocations.
|
||||
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_cancel_type);
|
||||
if (clk == CLOCK_REALTIME && !flags)
|
||||
status = -__syscall(SYS_nanosleep, req, rem);
|
||||
else
|
||||
status = -__syscall(SYS_clock_nanosleep, clk, flags, req, rem);
|
||||
pthread_setcanceltype(old_cancel_type, NULL);
|
||||
return status;
|
||||
}
|
||||
134
be/src/glibc-compatibility/musl/exp.c
Normal file
134
be/src/glibc-compatibility/musl/exp.c
Normal file
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Double-precision e^x function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp_data.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define InvLn2N __exp_data.invln2N
|
||||
#define NegLn2hiN __exp_data.negln2hiN
|
||||
#define NegLn2loN __exp_data.negln2loN
|
||||
#define Shift __exp_data.shift
|
||||
#define T __exp_data.tab
|
||||
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0) {
|
||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||
sbits -= 1009ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = 0x1p1009 * (scale + scale * tmp);
|
||||
return eval_as_double(y);
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (y < 1.0) {
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = 1.0 + y;
|
||||
lo = 1.0 - hi + y + lo;
|
||||
y = eval_as_double(hi + lo) - 1.0;
|
||||
/* Avoid -0.0 with downward rounding. */
|
||||
if (WANT_ROUNDING && y == 0.0)
|
||||
y = 0.0;
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t top12(double x)
|
||||
{
|
||||
return asuint64(x) >> 52;
|
||||
}
|
||||
|
||||
double exp(double x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
double_t kd, z, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12(x) & 0x7ff;
|
||||
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
|
||||
if (abstop - top12(0x1p-54) >= 0x80000000)
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
if (abstop >= top12(1024.0)) {
|
||||
if (asuint64(x) == asuint64(-INFINITY))
|
||||
return 0.0;
|
||||
if (abstop >= top12(INFINITY))
|
||||
return 1.0 + x;
|
||||
if (asuint64(x) >> 63)
|
||||
return __math_uflow(0);
|
||||
else
|
||||
return __math_oflow(0);
|
||||
}
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||
z = InvLn2N * x;
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint(z);
|
||||
ki = converttoint(z);
|
||||
#elif EXP_USE_TOINT_NARROW
|
||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||
kd = eval_as_double(z + Shift);
|
||||
ki = asuint64(kd) >> 16;
|
||||
kd = (double_t)(int32_t)ki;
|
||||
#else
|
||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||
kd = eval_as_double(z + Shift);
|
||||
ki = asuint64(kd);
|
||||
kd -= Shift;
|
||||
#endif
|
||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = ki << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble(T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
if (predict_false(abstop == 0))
|
||||
return specialcase(tmp, sbits, ki);
|
||||
scale = asdouble(sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double(scale + scale * tmp);
|
||||
}
|
||||
121
be/src/glibc-compatibility/musl/exp2.c
Normal file
121
be/src/glibc-compatibility/musl/exp2.c
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Double-precision 2^x function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp_data.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define Shift __exp_data.exp2_shift
|
||||
#define T __exp_data.tab
|
||||
#define C1 __exp_data.exp2_poly[0]
|
||||
#define C2 __exp_data.exp2_poly[1]
|
||||
#define C3 __exp_data.exp2_poly[2]
|
||||
#define C4 __exp_data.exp2_poly[3]
|
||||
#define C5 __exp_data.exp2_poly[4]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0) {
|
||||
/* k > 0, the exponent of scale might have overflowed by 1. */
|
||||
sbits -= 1ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = 2 * (scale + scale * tmp);
|
||||
return eval_as_double(y);
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (y < 1.0) {
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = 1.0 + y;
|
||||
lo = 1.0 - hi + y + lo;
|
||||
y = eval_as_double(hi + lo) - 1.0;
|
||||
/* Avoid -0.0 with downward rounding. */
|
||||
if (WANT_ROUNDING && y == 0.0)
|
||||
y = 0.0;
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t top12(double x)
|
||||
{
|
||||
return asuint64(x) >> 52;
|
||||
}
|
||||
|
||||
double exp2(double x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
double_t kd, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12(x) & 0x7ff;
|
||||
if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
|
||||
if (abstop - top12(0x1p-54) >= 0x80000000)
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
if (abstop >= top12(1024.0)) {
|
||||
if (asuint64(x) == asuint64(-INFINITY))
|
||||
return 0.0;
|
||||
if (abstop >= top12(INFINITY))
|
||||
return 1.0 + x;
|
||||
if (!(asuint64(x) >> 63))
|
||||
return __math_oflow(0);
|
||||
else if (asuint64(x) >= asuint64(-1075.0))
|
||||
return __math_uflow(0);
|
||||
}
|
||||
if (2 * asuint64(x) > 2 * asuint64(928.0))
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
|
||||
kd = eval_as_double(x + Shift);
|
||||
ki = asuint64(kd); /* k. */
|
||||
kd -= Shift; /* k/N for int k. */
|
||||
r = x - kd;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = ki << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble(T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.5/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
|
||||
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
if (predict_false(abstop == 0))
|
||||
return specialcase(tmp, sbits, ki);
|
||||
scale = asdouble(sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double(scale + scale * tmp);
|
||||
}
|
||||
69
be/src/glibc-compatibility/musl/exp2f.c
Normal file
69
be/src/glibc-compatibility/musl/exp2f.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Single-precision 2^x function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp2f_data.h"
|
||||
|
||||
/*
|
||||
EXP2F_TABLE_BITS = 5
|
||||
EXP2F_POLY_ORDER = 3
|
||||
|
||||
ULP error: 0.502 (nearest rounding.)
|
||||
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
|
||||
Wrong count: 168353 (all nearest rounding wrong results with fma.)
|
||||
Non-nearest ULP error: 1 (rounded ULP error)
|
||||
*/
|
||||
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define T __exp2f_data.tab
|
||||
#define C __exp2f_data.poly
|
||||
#define SHIFT __exp2f_data.shift_scaled
|
||||
|
||||
static inline uint32_t top12(float x)
|
||||
{
|
||||
return asuint(x) >> 20;
|
||||
}
|
||||
|
||||
float exp2f(float x)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, t;
|
||||
double_t kd, xd, z, r, r2, y, s;
|
||||
|
||||
xd = (double_t)x;
|
||||
abstop = top12(x) & 0x7ff;
|
||||
if (predict_false(abstop >= top12(128.0f))) {
|
||||
/* |x| >= 128 or x is nan. */
|
||||
if (asuint(x) == asuint(-INFINITY))
|
||||
return 0.0f;
|
||||
if (abstop >= top12(INFINITY))
|
||||
return x + x;
|
||||
if (x > 0.0f)
|
||||
return __math_oflowf(0);
|
||||
if (x <= -150.0f)
|
||||
return __math_uflowf(0);
|
||||
}
|
||||
|
||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
|
||||
kd = eval_as_double(xd + SHIFT);
|
||||
ki = asuint64(kd);
|
||||
kd -= SHIFT; /* k/N for int k. */
|
||||
r = xd - kd;
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
t += ki << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble(t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float(y);
|
||||
}
|
||||
35
be/src/glibc-compatibility/musl/exp2f_data.c
Normal file
35
be/src/glibc-compatibility/musl/exp2f_data.c
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Shared data between expf, exp2f and powf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "exp2f_data.h"
|
||||
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
|
||||
const struct exp2f_data __exp2f_data = {
|
||||
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
|
||||
used for computing 2^(k/N) for an int |k| < 150 N as
|
||||
double(tab[k%N] + (k << 52-BITS)) */
|
||||
.tab = {
|
||||
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
|
||||
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
|
||||
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
|
||||
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
|
||||
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
|
||||
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
|
||||
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
|
||||
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
|
||||
},
|
||||
.shift_scaled = 0x1.8p+52 / N,
|
||||
.poly = {
|
||||
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
|
||||
},
|
||||
.shift = 0x1.8p+52,
|
||||
.invln2_scaled = 0x1.71547652b82fep+0 * N,
|
||||
.poly_scaled = {
|
||||
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
|
||||
},
|
||||
};
|
||||
23
be/src/glibc-compatibility/musl/exp2f_data.h
Normal file
23
be/src/glibc-compatibility/musl/exp2f_data.h
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _EXP2F_DATA_H
|
||||
#define _EXP2F_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
/* Shared between expf, exp2f and powf. */
|
||||
#define EXP2F_TABLE_BITS 5
|
||||
#define EXP2F_POLY_ORDER 3
|
||||
extern hidden const struct exp2f_data {
|
||||
uint64_t tab[1 << EXP2F_TABLE_BITS];
|
||||
double shift_scaled;
|
||||
double poly[EXP2F_POLY_ORDER];
|
||||
double shift;
|
||||
double invln2_scaled;
|
||||
double poly_scaled[EXP2F_POLY_ORDER];
|
||||
} __exp2f_data;
|
||||
|
||||
#endif
|
||||
182
be/src/glibc-compatibility/musl/exp_data.c
Normal file
182
be/src/glibc-compatibility/musl/exp_data.c
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
* Shared data between exp, exp2 and pow.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "exp_data.h"
|
||||
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
|
||||
const struct exp_data __exp_data = {
|
||||
// N/ln2
|
||||
.invln2N = 0x1.71547652b82fep0 * N,
|
||||
// -ln2/N
|
||||
.negln2hiN = -0x1.62e42fefa0000p-8,
|
||||
.negln2loN = -0x1.cf79abc9e3b3ap-47,
|
||||
// Used for rounding when !TOINT_INTRINSICS
|
||||
#if EXP_USE_TOINT_NARROW
|
||||
.shift = 0x1800000000.8p0,
|
||||
#else
|
||||
.shift = 0x1.8p52,
|
||||
#endif
|
||||
// exp polynomial coefficients.
|
||||
.poly = {
|
||||
// abs error: 1.555*2^-66
|
||||
// ulp error: 0.509 (0.511 without fma)
|
||||
// if |x| < ln2/256+eps
|
||||
// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
|
||||
// abs error if |x| < ln2/128: 1.7145*2^-56
|
||||
0x1.ffffffffffdbdp-2,
|
||||
0x1.555555555543cp-3,
|
||||
0x1.55555cf172b91p-5,
|
||||
0x1.1111167a4d017p-7,
|
||||
},
|
||||
.exp2_shift = 0x1.8p52 / N,
|
||||
// exp2 polynomial coefficients.
|
||||
.exp2_poly = {
|
||||
// abs error: 1.2195*2^-65
|
||||
// ulp error: 0.507 (0.511 without fma)
|
||||
// if |x| < 1/256
|
||||
// abs error if |x| < 1/128: 1.9941*2^-56
|
||||
0x1.62e42fefa39efp-1,
|
||||
0x1.ebfbdff82c424p-3,
|
||||
0x1.c6b08d70cf4b5p-5,
|
||||
0x1.3b2abd24650ccp-7,
|
||||
0x1.5d7e09b4e3a84p-10,
|
||||
},
|
||||
// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
|
||||
// tab[2*k] = asuint64(T[k])
|
||||
// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
|
||||
.tab = {
|
||||
0x0, 0x3ff0000000000000,
|
||||
0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
|
||||
0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
|
||||
0xbc905e7a108766d1, 0x3fefe315e86e7f85,
|
||||
0x3c8cd2523567f613, 0x3fefd9b0d3158574,
|
||||
0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
|
||||
0x3c60f74e61e6c861, 0x3fefc74518759bc8,
|
||||
0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
|
||||
0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
|
||||
0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
|
||||
0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
|
||||
0xbc6a033489906e0b, 0x3fef9b66affed31b,
|
||||
0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
|
||||
0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
|
||||
0xbc91c923b9d5f416, 0x3fef829aaea92de0,
|
||||
0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
|
||||
0xbc801b15eaa59348, 0x3fef72b83c7d517b,
|
||||
0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
|
||||
0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
|
||||
0xbc96d99c7611eb26, 0x3fef5be084045cd4,
|
||||
0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
|
||||
0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
|
||||
0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
|
||||
0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
|
||||
0x3c968efde3a8a894, 0x3fef387a6e756238,
|
||||
0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
|
||||
0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
|
||||
0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
|
||||
0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
|
||||
0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
|
||||
0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
|
||||
0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
|
||||
0x3c834d754db0abb6, 0x3fef06fe0a31b715,
|
||||
0x3c864201e2ac744c, 0x3fef0170fc4cd831,
|
||||
0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
|
||||
0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
|
||||
0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
|
||||
0xbc9907f81b512d8e, 0x3feeecae6d05d866,
|
||||
0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
|
||||
0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
|
||||
0x3c859f48a72a4c6d, 0x3feedea64c123422,
|
||||
0xbc9312607a28698a, 0x3feeda4504ac801c,
|
||||
0xbc58a78f4817895b, 0x3feed60a21f72e2a,
|
||||
0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
|
||||
0x3c4363ed60c2ac11, 0x3feece086061892d,
|
||||
0x3c9666093b0664ef, 0x3feeca41ed1d0057,
|
||||
0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
|
||||
0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
|
||||
0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
|
||||
0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
|
||||
0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
|
||||
0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
|
||||
0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
|
||||
0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
|
||||
0x3c93350518fdd78e, 0x3feeaf4736b527da,
|
||||
0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
|
||||
0x3c9063e1e21c5409, 0x3feeab07dd485429,
|
||||
0x3c34c7855019c6ea, 0x3feea9268a5946b7,
|
||||
0x3c9432e62b64c035, 0x3feea76f15ad2148,
|
||||
0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
|
||||
0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
|
||||
0xbc845378892be9ae, 0x3feea34634ccc320,
|
||||
0xbc93cedd78565858, 0x3feea23882552225,
|
||||
0x3c5710aa807e1964, 0x3feea155d44ca973,
|
||||
0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
|
||||
0xbc6a12ad8734b982, 0x3feea012750bdabf,
|
||||
0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
|
||||
0xbc80dc3d54e08851, 0x3fee9f7df9519484,
|
||||
0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
|
||||
0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
|
||||
0xbc8619321e55e68a, 0x3fee9feb564267c9,
|
||||
0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
|
||||
0xbc7b32dcb94da51d, 0x3feea11473eb0187,
|
||||
0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
|
||||
0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
|
||||
0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
|
||||
0xbc9369b6f13b3734, 0x3feea589994cce13,
|
||||
0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
|
||||
0xbc94d450d872576e, 0x3feea8d99b4492ed,
|
||||
0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
|
||||
0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
|
||||
0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
|
||||
0x3c7bf68359f35f44, 0x3feeb1ae99157736,
|
||||
0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
|
||||
0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
|
||||
0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
|
||||
0xbc92434322f4f9aa, 0x3feebd829fde4e50,
|
||||
0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
|
||||
0x3c71affc2b91ce27, 0x3feec49182a3f090,
|
||||
0x3c6dd235e10a73bb, 0x3feec86319e32323,
|
||||
0xbc87c50422622263, 0x3feecc667b5de565,
|
||||
0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
|
||||
0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
|
||||
0x3c90cc319cee31d2, 0x3feed99e1330b358,
|
||||
0x3c8469846e735ab3, 0x3feede6b5579fdbf,
|
||||
0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
|
||||
0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
|
||||
0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
|
||||
0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
|
||||
0xbc90a40e3da6f640, 0x3feef9728de5593a,
|
||||
0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
|
||||
0xbc91eee26b588a35, 0x3fef05b030a1064a,
|
||||
0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
|
||||
0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
|
||||
0x3c736eae30af0cb3, 0x3fef199bdd85529c,
|
||||
0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
|
||||
0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
|
||||
0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
|
||||
0x3c676b2c6c921968, 0x3fef3720dcef9069,
|
||||
0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
|
||||
0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
|
||||
0xbc900dae3875a949, 0x3fef4f87080d89f2,
|
||||
0x3c74a385a63d07a7, 0x3fef5818dcfba487,
|
||||
0xbc82919e2040220f, 0x3fef60e316c98398,
|
||||
0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
|
||||
0x3c843a59ac016b4b, 0x3fef7321f301b460,
|
||||
0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
|
||||
0xbc892ab93b470dc9, 0x3fef864614f5a129,
|
||||
0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
|
||||
0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
|
||||
0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
|
||||
0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
|
||||
0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
|
||||
0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
|
||||
0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
|
||||
0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
|
||||
0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
|
||||
0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
|
||||
},
|
||||
};
|
||||
26
be/src/glibc-compatibility/musl/exp_data.h
Normal file
26
be/src/glibc-compatibility/musl/exp_data.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _EXP_DATA_H
|
||||
#define _EXP_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
#include <stdint.h>
|
||||
|
||||
#define EXP_TABLE_BITS 7
|
||||
#define EXP_POLY_ORDER 5
|
||||
#define EXP_USE_TOINT_NARROW 0
|
||||
#define EXP2_POLY_ORDER 5
|
||||
extern hidden const struct exp_data {
|
||||
double invln2N;
|
||||
double shift;
|
||||
double negln2hiN;
|
||||
double negln2loN;
|
||||
double poly[4]; /* Last four coefficients. */
|
||||
double exp2_shift;
|
||||
double exp2_poly[EXP2_POLY_ORDER];
|
||||
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
|
||||
} __exp_data;
|
||||
|
||||
#endif
|
||||
10
be/src/glibc-compatibility/musl/fallocate.c
Normal file
10
be/src/glibc-compatibility/musl/fallocate.c
Normal file
@ -0,0 +1,10 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
extern long int syscall (long int __sysno, ...) __THROW;
|
||||
|
||||
int fallocate(int fd, int mode, off_t base, off_t len)
|
||||
{
|
||||
return syscall(SYS_fallocate, fd, mode, base, len);
|
||||
}
|
||||
61
be/src/glibc-compatibility/musl/fcntl.c
Normal file
61
be/src/glibc-compatibility/musl/fcntl.c
Normal file
@ -0,0 +1,61 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <syscall.h>
|
||||
#include "syscall.h"
|
||||
|
||||
#include <features.h>
|
||||
#if defined(__GLIBC__) && __GLIBC__ >= 2
|
||||
# define GLIBC_MINOR __GLIBC_MINOR__
|
||||
#elif defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ >= 2
|
||||
# define GLIBC_MINOR __GNU_LIBRARY_MINOR__
|
||||
#endif
|
||||
|
||||
#if defined(GLIBC_MINOR) && GLIBC_MINOR >= 28
|
||||
|
||||
int fcntl64(int fd, int cmd, ...)
|
||||
{
|
||||
unsigned long arg;
|
||||
va_list ap;
|
||||
va_start(ap, cmd);
|
||||
arg = va_arg(ap, unsigned long);
|
||||
va_end(ap);
|
||||
if (cmd == F_SETFL) arg |= O_LARGEFILE;
|
||||
if (cmd == F_SETLKW) return syscall(SYS_fcntl, fd, cmd, (void *)arg);
|
||||
if (cmd == F_GETOWN) {
|
||||
struct f_owner_ex ex;
|
||||
int ret = __syscall(SYS_fcntl, fd, F_GETOWN_EX, &ex);
|
||||
if (ret == -EINVAL) return __syscall(SYS_fcntl, fd, cmd, (void *)arg);
|
||||
if (ret) return __syscall_ret(ret);
|
||||
return ex.type == F_OWNER_PGRP ? -ex.pid : ex.pid;
|
||||
}
|
||||
if (cmd == F_DUPFD_CLOEXEC) {
|
||||
int ret = __syscall(SYS_fcntl, fd, F_DUPFD_CLOEXEC, arg);
|
||||
if (ret != -EINVAL) {
|
||||
if (ret >= 0)
|
||||
__syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
|
||||
return __syscall_ret(ret);
|
||||
}
|
||||
ret = __syscall(SYS_fcntl, fd, F_DUPFD_CLOEXEC, 0);
|
||||
if (ret != -EINVAL) {
|
||||
if (ret >= 0) __syscall(SYS_close, ret);
|
||||
return __syscall_ret(-EINVAL);
|
||||
}
|
||||
ret = __syscall(SYS_fcntl, fd, F_DUPFD, arg);
|
||||
if (ret >= 0) __syscall(SYS_fcntl, ret, F_SETFD, FD_CLOEXEC);
|
||||
return __syscall_ret(ret);
|
||||
}
|
||||
switch (cmd) {
|
||||
case F_SETLK:
|
||||
case F_GETLK:
|
||||
case F_GETOWN_EX:
|
||||
case F_SETOWN_EX:
|
||||
return syscall(SYS_fcntl, fd, cmd, (void *)arg);
|
||||
default:
|
||||
return syscall(SYS_fcntl, fd, cmd, arg);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
38
be/src/glibc-compatibility/musl/futimens.c
Normal file
38
be/src/glibc-compatibility/musl/futimens.c
Normal file
@ -0,0 +1,38 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
#include <syscall.h>
|
||||
|
||||
int futimens(int fd, const struct timespec times[2])
|
||||
{
|
||||
int r = __syscall(SYS_utimensat, fd, 0, times, 0);
|
||||
#ifdef SYS_futimesat
|
||||
if (r != -ENOSYS) return __syscall_ret(r);
|
||||
struct timeval *tv = 0, tmp[2];
|
||||
if (times) {
|
||||
int i;
|
||||
tv = tmp;
|
||||
for (i=0; i<2; i++) {
|
||||
if (times[i].tv_nsec >= 1000000000ULL) {
|
||||
if (times[i].tv_nsec == UTIME_NOW &&
|
||||
times[1-i].tv_nsec == UTIME_NOW) {
|
||||
tv = 0;
|
||||
break;
|
||||
}
|
||||
if (times[i].tv_nsec == UTIME_OMIT)
|
||||
return __syscall_ret(-ENOSYS);
|
||||
return __syscall_ret(-EINVAL);
|
||||
}
|
||||
tmp[i].tv_sec = times[i].tv_sec;
|
||||
tmp[i].tv_usec = times[i].tv_nsec / 1000;
|
||||
}
|
||||
}
|
||||
|
||||
r = __syscall(SYS_futimesat, fd, 0, tv);
|
||||
if (r != -ENOSYS || fd != AT_FDCWD) return __syscall_ret(r);
|
||||
r = __syscall(SYS_utimes, 0, tv);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
33
be/src/glibc-compatibility/musl/getentropy.c
Normal file
33
be/src/glibc-compatibility/musl/getentropy.c
Normal file
@ -0,0 +1,33 @@
|
||||
#define _DEFAULT_SOURCE
|
||||
#include <unistd.h>
|
||||
#include <sys/random.h>
|
||||
#include <pthread.h>
|
||||
#include <errno.h>
|
||||
|
||||
int getentropy(void *buffer, size_t len)
|
||||
{
|
||||
int cs, ret = 0;
|
||||
char *pos = buffer;
|
||||
|
||||
if (len > 256) {
|
||||
errno = EIO;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
|
||||
|
||||
while (len) {
|
||||
ret = getrandom(pos, len, 0);
|
||||
if (ret < 0) {
|
||||
if (errno == EINTR) continue;
|
||||
else break;
|
||||
}
|
||||
pos += ret;
|
||||
len -= ret;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
pthread_setcancelstate(cs, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
23
be/src/glibc-compatibility/musl/getrandom.c
Normal file
23
be/src/glibc-compatibility/musl/getrandom.c
Normal file
@ -0,0 +1,23 @@
|
||||
/** We have to replace glibc getrandom only when glibc version is higher than 2.25.
|
||||
* In previous versions of glibc this function doesn't exist
|
||||
* and old kernels may be missing SYS_getrandom syscall.
|
||||
*/
|
||||
#include <features.h>
|
||||
#if defined(__GLIBC__) && __GLIBC__ >= 2
|
||||
# define GLIBC_MINOR __GLIBC_MINOR__
|
||||
#elif defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ >= 2
|
||||
# define GLIBC_MINOR __GNU_LIBRARY_MINOR__
|
||||
#endif
|
||||
|
||||
#if defined(GLIBC_MINOR) && GLIBC_MINOR >= 25
|
||||
|
||||
#include <unistd.h>
|
||||
#include <syscall.h>
|
||||
#include "syscall.h"
|
||||
|
||||
ssize_t getrandom(void *buf, size_t buflen, unsigned flags)
|
||||
{
|
||||
/// There was cancellable syscall (syscall_cp), but I don't care too.
|
||||
return syscall(SYS_getrandom, buf, buflen, flags);
|
||||
}
|
||||
#endif
|
||||
240
be/src/glibc-compatibility/musl/glob.c
Normal file
240
be/src/glibc-compatibility/musl/glob.c
Normal file
@ -0,0 +1,240 @@
|
||||
#include <glob.h>
|
||||
#include <fnmatch.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
|
||||
struct match
|
||||
{
|
||||
struct match *next;
|
||||
char name[1];
|
||||
};
|
||||
|
||||
static int is_literal(const char *p, int useesc)
|
||||
{
|
||||
int bracket = 0;
|
||||
for (; *p; p++) {
|
||||
switch (*p) {
|
||||
case '\\':
|
||||
if (!useesc) break;
|
||||
case '?':
|
||||
case '*':
|
||||
return 0;
|
||||
case '[':
|
||||
bracket = 1;
|
||||
break;
|
||||
case ']':
|
||||
if (bracket) return 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int append(struct match **tail, const char *name, size_t len, int mark)
|
||||
{
|
||||
struct match *new = malloc(sizeof(struct match) + len + 1);
|
||||
if (!new) return -1;
|
||||
(*tail)->next = new;
|
||||
new->next = NULL;
|
||||
strcpy(new->name, name);
|
||||
if (mark) strcat(new->name, "/");
|
||||
*tail = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int match_in_dir(const char *d, const char *p, int flags, int (*errfunc)(const char *path, int err), struct match **tail)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *de;
|
||||
char pat[strlen(p)+1];
|
||||
char *p2;
|
||||
size_t l = strlen(d);
|
||||
int literal;
|
||||
int fnm_flags= ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0)
|
||||
| ((!(flags & GLOB_PERIOD)) ? FNM_PERIOD : 0);
|
||||
int error;
|
||||
|
||||
if ((p2 = strchr(p, '/'))) {
|
||||
strcpy(pat, p);
|
||||
pat[p2-p] = 0;
|
||||
for (; *p2 == '/'; p2++);
|
||||
p = pat;
|
||||
}
|
||||
literal = is_literal(p, !(flags & GLOB_NOESCAPE));
|
||||
if (*d == '/' && !*(d+1)) l = 0;
|
||||
|
||||
/* rely on opendir failing for nondirectory objects */
|
||||
dir = opendir(*d ? d : ".");
|
||||
error = errno;
|
||||
if (!dir) {
|
||||
/* this is not an error -- we let opendir call stat for us */
|
||||
if (error == ENOTDIR) return 0;
|
||||
if (error == EACCES && !*p) {
|
||||
struct stat st;
|
||||
if (!stat(d, &st) && S_ISDIR(st.st_mode)) {
|
||||
if (append(tail, d, l, l))
|
||||
return GLOB_NOSPACE;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (errfunc(d, error) || (flags & GLOB_ERR))
|
||||
return GLOB_ABORTED;
|
||||
return 0;
|
||||
}
|
||||
if (!*p) {
|
||||
error = append(tail, d, l, l) ? GLOB_NOSPACE : 0;
|
||||
closedir(dir);
|
||||
return error;
|
||||
}
|
||||
while ((de = readdir(dir))) {
|
||||
char namebuf[l+de->d_reclen+2], *name = namebuf;
|
||||
if (!literal && fnmatch(p, de->d_name, fnm_flags))
|
||||
continue;
|
||||
if (literal && strcmp(p, de->d_name))
|
||||
continue;
|
||||
if (p2 && de->d_type && !S_ISDIR(de->d_type<<12) && !S_ISLNK(de->d_type<<12))
|
||||
continue;
|
||||
/* With GLOB_PERIOD, don't allow matching . or .. unless
|
||||
* fnmatch would match them with FNM_PERIOD rules in effect. */
|
||||
if (p2 && (flags & GLOB_PERIOD) && de->d_name[0]=='.'
|
||||
&& (!de->d_name[1] || (de->d_name[1]=='.' && !de->d_name[2]))
|
||||
&& fnmatch(p, de->d_name, fnm_flags | FNM_PERIOD))
|
||||
continue;
|
||||
if (*d) {
|
||||
memcpy(name, d, l);
|
||||
name[l] = '/';
|
||||
strcpy(name+l+1, de->d_name);
|
||||
} else {
|
||||
name = de->d_name;
|
||||
}
|
||||
if (p2) {
|
||||
if ((error = match_in_dir(name, p2, flags, errfunc, tail))) {
|
||||
closedir(dir);
|
||||
return error;
|
||||
}
|
||||
} else {
|
||||
int mark = 0;
|
||||
if (flags & GLOB_MARK) {
|
||||
if (de->d_type && !S_ISLNK(de->d_type<<12))
|
||||
mark = S_ISDIR(de->d_type<<12);
|
||||
else {
|
||||
struct stat st;
|
||||
stat(name, &st);
|
||||
mark = S_ISDIR(st.st_mode);
|
||||
}
|
||||
}
|
||||
if (append(tail, name, l+de->d_reclen+1, mark)) {
|
||||
closedir(dir);
|
||||
return GLOB_NOSPACE;
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
if (error && (errfunc(d, error) || (flags & GLOB_ERR)))
|
||||
return GLOB_ABORTED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ignore_err(const char *path, int err)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void freelist(struct match *head)
|
||||
{
|
||||
struct match *match, *next;
|
||||
for (match=head->next; match; match=next) {
|
||||
next = match->next;
|
||||
free(match);
|
||||
}
|
||||
}
|
||||
|
||||
static int sort(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(*(const char **)a, *(const char **)b);
|
||||
}
|
||||
|
||||
int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g)
|
||||
{
|
||||
const char *p=pat, *d;
|
||||
struct match head = { .next = NULL }, *tail = &head;
|
||||
size_t cnt, i;
|
||||
size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0;
|
||||
int error = 0;
|
||||
|
||||
if (*p == '/') {
|
||||
for (; *p == '/'; p++);
|
||||
d = "/";
|
||||
} else {
|
||||
d = "";
|
||||
}
|
||||
|
||||
if (!errfunc) errfunc = ignore_err;
|
||||
|
||||
if (!(flags & GLOB_APPEND)) {
|
||||
g->gl_offs = offs;
|
||||
g->gl_pathc = 0;
|
||||
g->gl_pathv = NULL;
|
||||
}
|
||||
|
||||
if (strnlen(p, PATH_MAX+1) > PATH_MAX) return GLOB_NOSPACE;
|
||||
|
||||
if (*pat) error = match_in_dir(d, p, flags, errfunc, &tail);
|
||||
if (error == GLOB_NOSPACE) {
|
||||
freelist(&head);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (cnt=0, tail=head.next; tail; tail=tail->next, cnt++);
|
||||
if (!cnt) {
|
||||
if (flags & GLOB_NOCHECK) {
|
||||
tail = &head;
|
||||
if (append(&tail, pat, strlen(pat), 0))
|
||||
return GLOB_NOSPACE;
|
||||
cnt++;
|
||||
} else
|
||||
return GLOB_NOMATCH;
|
||||
}
|
||||
|
||||
if (flags & GLOB_APPEND) {
|
||||
char **pathv = realloc(g->gl_pathv, (offs + g->gl_pathc + cnt + 1) * sizeof(char *));
|
||||
if (!pathv) {
|
||||
freelist(&head);
|
||||
return GLOB_NOSPACE;
|
||||
}
|
||||
g->gl_pathv = pathv;
|
||||
offs += g->gl_pathc;
|
||||
} else {
|
||||
g->gl_pathv = malloc((offs + cnt + 1) * sizeof(char *));
|
||||
if (!g->gl_pathv) {
|
||||
freelist(&head);
|
||||
return GLOB_NOSPACE;
|
||||
}
|
||||
for (i=0; i<offs; i++)
|
||||
g->gl_pathv[i] = NULL;
|
||||
}
|
||||
for (i=0, tail=head.next; i<cnt; tail=tail->next, i++)
|
||||
g->gl_pathv[offs + i] = tail->name;
|
||||
g->gl_pathv[offs + i] = NULL;
|
||||
g->gl_pathc += cnt;
|
||||
|
||||
if (!(flags & GLOB_NOSORT))
|
||||
qsort(g->gl_pathv+offs, cnt, sizeof(char *), sort);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
void globfree(glob_t *g)
|
||||
{
|
||||
size_t i;
|
||||
for (i=0; i<g->gl_pathc; i++)
|
||||
free(g->gl_pathv[g->gl_offs + i] - offsetof(struct match, name));
|
||||
free(g->gl_pathv);
|
||||
g->gl_pathc = 0;
|
||||
g->gl_pathv = NULL;
|
||||
}
|
||||
268
be/src/glibc-compatibility/musl/lgamma.c
Normal file
268
be/src/glibc-compatibility/musl/lgamma.c
Normal file
@ -0,0 +1,268 @@
|
||||
/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*
|
||||
*/
|
||||
/* lgamma_r(x, signgamp)
|
||||
* Reentrant version of the logarithm of the Gamma function
|
||||
* with user provide pointer for the sign of Gamma(x).
|
||||
*
|
||||
* Method:
|
||||
* 1. Argument Reduction for 0 < x <= 8
|
||||
* Since gamma(1+s)=s*gamma(s), for x in [0,8], we may
|
||||
* reduce x to a number in [1.5,2.5] by
|
||||
* lgamma(1+s) = log(s) + lgamma(s)
|
||||
* for example,
|
||||
* lgamma(7.3) = log(6.3) + lgamma(6.3)
|
||||
* = log(6.3*5.3) + lgamma(5.3)
|
||||
* = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3)
|
||||
* 2. Polynomial approximation of lgamma around its
|
||||
* minimun ymin=1.461632144968362245 to maintain monotonicity.
|
||||
* On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use
|
||||
* Let z = x-ymin;
|
||||
* lgamma(x) = -1.214862905358496078218 + z^2*poly(z)
|
||||
* where
|
||||
* poly(z) is a 14 degree polynomial.
|
||||
* 2. Rational approximation in the primary interval [2,3]
|
||||
* We use the following approximation:
|
||||
* s = x-2.0;
|
||||
* lgamma(x) = 0.5*s + s*P(s)/Q(s)
|
||||
* with accuracy
|
||||
* |P/Q - (lgamma(x)-0.5s)| < 2**-61.71
|
||||
* Our algorithms are based on the following observation
|
||||
*
|
||||
* zeta(2)-1 2 zeta(3)-1 3
|
||||
* lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ...
|
||||
* 2 3
|
||||
*
|
||||
* where Euler = 0.5771... is the Euler constant, which is very
|
||||
* close to 0.5.
|
||||
*
|
||||
* 3. For x>=8, we have
|
||||
* lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+....
|
||||
* (better formula:
|
||||
* lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...)
|
||||
* Let z = 1/x, then we approximation
|
||||
* f(z) = lgamma(x) - (x-0.5)(log(x)-1)
|
||||
* by
|
||||
* 3 5 11
|
||||
* w = w0 + w1*z + w2*z + w3*z + ... + w6*z
|
||||
* where
|
||||
* |w - f(z)| < 2**-58.74
|
||||
*
|
||||
* 4. For negative x, since (G is gamma function)
|
||||
* -x*G(-x)*G(x) = pi/sin(pi*x),
|
||||
* we have
|
||||
* G(x) = pi/(sin(pi*x)*(-x)*G(-x))
|
||||
* since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
|
||||
* Hence, for x<0, signgam = sign(sin(pi*x)) and
|
||||
* lgamma(x) = log(|Gamma(x)|)
|
||||
* = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
|
||||
* Note: one should avoid compute pi*(-x) directly in the
|
||||
* computation of sin(pi*(-x)).
|
||||
*
|
||||
* 5. Special Cases
|
||||
* lgamma(2+s) ~ s*(1-Euler) for tiny s
|
||||
* lgamma(1) = lgamma(2) = 0
|
||||
* lgamma(x) ~ -log(|x|) for tiny x
|
||||
* lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero
|
||||
* lgamma(inf) = inf
|
||||
* lgamma(-inf) = inf (bug for bug compatible with C99!?)
|
||||
*
|
||||
*/
|
||||
|
||||
static const double
|
||||
pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
|
||||
a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
|
||||
a1 = 3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
|
||||
a2 = 6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
|
||||
a3 = 2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
|
||||
a4 = 7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
|
||||
a5 = 2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
|
||||
a6 = 1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
|
||||
a7 = 5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
|
||||
a8 = 2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
|
||||
a9 = 1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
|
||||
a10 = 2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
|
||||
a11 = 4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
|
||||
tc = 1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
|
||||
tf = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
|
||||
/* tt = -(tail of tf) */
|
||||
tt = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
|
||||
t0 = 4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
|
||||
t1 = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
|
||||
t2 = 6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
|
||||
t3 = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
|
||||
t4 = 1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
|
||||
t5 = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
|
||||
t6 = 6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
|
||||
t7 = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
|
||||
t8 = 2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
|
||||
t9 = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
|
||||
t10 = 8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
|
||||
t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
|
||||
t12 = 3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
|
||||
t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
|
||||
t14 = 3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
|
||||
u0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
|
||||
u1 = 6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
|
||||
u2 = 1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
|
||||
u3 = 9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
|
||||
u4 = 2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
|
||||
u5 = 1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
|
||||
v1 = 2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
|
||||
v2 = 2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
|
||||
v3 = 7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
|
||||
v4 = 1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
|
||||
v5 = 3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
|
||||
s0 = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
|
||||
s1 = 2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
|
||||
s2 = 3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
|
||||
s3 = 1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
|
||||
s4 = 2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
|
||||
s5 = 1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
|
||||
s6 = 3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
|
||||
r1 = 1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
|
||||
r2 = 7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
|
||||
r3 = 1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
|
||||
r4 = 1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
|
||||
r5 = 7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
|
||||
r6 = 7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
|
||||
w0 = 4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
|
||||
w1 = 8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
|
||||
w2 = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
|
||||
w3 = 7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
|
||||
w4 = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
|
||||
w5 = 8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
|
||||
w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <math.h>
|
||||
|
||||
double lgamma_r(double x, int *signgamp)
|
||||
{
|
||||
union {double f; uint64_t i;} u = {x};
|
||||
double_t t,y,z,nadj=0,p,p1,p2,p3,q,r,w;
|
||||
uint32_t ix;
|
||||
int sign,i;
|
||||
|
||||
/* purge off +-inf, NaN, +-0, tiny and negative arguments */
|
||||
*signgamp = 1;
|
||||
sign = u.i>>63;
|
||||
ix = u.i>>32 & 0x7fffffff;
|
||||
if (ix >= 0x7ff00000)
|
||||
return x*x;
|
||||
if (ix < (0x3ff-70)<<20) { /* |x|<2**-70, return -log(|x|) */
|
||||
if(sign) {
|
||||
x = -x;
|
||||
*signgamp = -1;
|
||||
}
|
||||
return -log(x);
|
||||
}
|
||||
if (sign) {
|
||||
x = -x;
|
||||
t = sin(pi * x);
|
||||
if (t == 0.0) /* -integer */
|
||||
return 1.0/(x-x);
|
||||
if (t > 0.0)
|
||||
*signgamp = -1;
|
||||
else
|
||||
t = -t;
|
||||
nadj = log(pi/(t*x));
|
||||
}
|
||||
|
||||
/* purge off 1 and 2 */
|
||||
if ((ix == 0x3ff00000 || ix == 0x40000000) && (uint32_t)u.i == 0)
|
||||
r = 0;
|
||||
/* for x < 2.0 */
|
||||
else if (ix < 0x40000000) {
|
||||
if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */
|
||||
r = -log(x);
|
||||
if (ix >= 0x3FE76944) {
|
||||
y = 1.0 - x;
|
||||
i = 0;
|
||||
} else if (ix >= 0x3FCDA661) {
|
||||
y = x - (tc-1.0);
|
||||
i = 1;
|
||||
} else {
|
||||
y = x;
|
||||
i = 2;
|
||||
}
|
||||
} else {
|
||||
r = 0.0;
|
||||
if (ix >= 0x3FFBB4C3) { /* [1.7316,2] */
|
||||
y = 2.0 - x;
|
||||
i = 0;
|
||||
} else if(ix >= 0x3FF3B4C4) { /* [1.23,1.73] */
|
||||
y = x - tc;
|
||||
i = 1;
|
||||
} else {
|
||||
y = x - 1.0;
|
||||
i = 2;
|
||||
}
|
||||
}
|
||||
switch (i) {
|
||||
case 0:
|
||||
z = y*y;
|
||||
p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
|
||||
p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
|
||||
p = y*p1+p2;
|
||||
r += (p-0.5*y);
|
||||
break;
|
||||
case 1:
|
||||
z = y*y;
|
||||
w = z*y;
|
||||
p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12))); /* parallel comp */
|
||||
p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
|
||||
p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
|
||||
p = z*p1-(tt-w*(p2+y*p3));
|
||||
r += tf + p;
|
||||
break;
|
||||
case 2:
|
||||
p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
|
||||
p2 = 1.0+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
|
||||
r += -0.5*y + p1/p2;
|
||||
}
|
||||
} else if (ix < 0x40200000) { /* x < 8.0 */
|
||||
i = (int)x;
|
||||
y = x - (double)i;
|
||||
p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
|
||||
q = 1.0+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
|
||||
r = 0.5*y+p/q;
|
||||
z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */
|
||||
switch (i) {
|
||||
case 7: z *= y + 6.0; /* FALLTHRU */
|
||||
case 6: z *= y + 5.0; /* FALLTHRU */
|
||||
case 5: z *= y + 4.0; /* FALLTHRU */
|
||||
case 4: z *= y + 3.0; /* FALLTHRU */
|
||||
case 3: z *= y + 2.0; /* FALLTHRU */
|
||||
r += log(z);
|
||||
break;
|
||||
}
|
||||
} else if (ix < 0x43900000) { /* 8.0 <= x < 2**58 */
|
||||
t = log(x);
|
||||
z = 1.0/x;
|
||||
y = z*z;
|
||||
w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
|
||||
r = (x-0.5)*(t-1.0)+w;
|
||||
} else /* 2**58 <= x <= inf */
|
||||
r = x*(log(x)-1.0);
|
||||
if (sign)
|
||||
r = nadj - r;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
int signgam;
|
||||
|
||||
double lgamma(double x)
|
||||
{
|
||||
return lgamma_r(x, &signgam);
|
||||
}
|
||||
249
be/src/glibc-compatibility/musl/libm.h
Normal file
249
be/src/glibc-compatibility/musl/libm.h
Normal file
@ -0,0 +1,249 @@
|
||||
#ifndef _LIBM_H
|
||||
#define _LIBM_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <endian.h>
|
||||
#include "musl_features.h"
|
||||
|
||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
union ldshape {
|
||||
long double f;
|
||||
struct {
|
||||
uint64_t m;
|
||||
uint16_t se;
|
||||
} i;
|
||||
};
|
||||
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
|
||||
/* This is the m68k variant of 80-bit long double, and this definition only works
|
||||
* on archs where the alignment requirement of uint64_t is <= 4. */
|
||||
union ldshape {
|
||||
long double f;
|
||||
struct {
|
||||
uint16_t se;
|
||||
uint16_t pad;
|
||||
uint64_t m;
|
||||
} i;
|
||||
};
|
||||
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
union ldshape {
|
||||
long double f;
|
||||
struct {
|
||||
uint64_t lo;
|
||||
uint32_t mid;
|
||||
uint16_t top;
|
||||
uint16_t se;
|
||||
} i;
|
||||
struct {
|
||||
uint64_t lo;
|
||||
uint64_t hi;
|
||||
} i2;
|
||||
};
|
||||
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
|
||||
union ldshape {
|
||||
long double f;
|
||||
struct {
|
||||
uint16_t se;
|
||||
uint16_t top;
|
||||
uint32_t mid;
|
||||
uint64_t lo;
|
||||
} i;
|
||||
struct {
|
||||
uint64_t hi;
|
||||
uint64_t lo;
|
||||
} i2;
|
||||
};
|
||||
#else
|
||||
#error Unsupported long double representation
|
||||
#endif
|
||||
|
||||
/* Support non-nearest rounding mode. */
|
||||
#define WANT_ROUNDING 1
|
||||
/* Support signaling NaNs. */
|
||||
#define WANT_SNAN 0
|
||||
|
||||
#if WANT_SNAN
|
||||
#error SNaN is unsupported
|
||||
#else
|
||||
#define issignalingf_inline(x) 0
|
||||
#define issignaling_inline(x) 0
|
||||
#endif
|
||||
|
||||
#ifndef TOINT_INTRINSICS
|
||||
#define TOINT_INTRINSICS 0
|
||||
#endif
|
||||
|
||||
#if TOINT_INTRINSICS
|
||||
/* Round x to nearest int in all rounding modes, ties have to be rounded
|
||||
consistently with converttoint so the results match. If the result
|
||||
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
|
||||
static double_t roundtoint(double_t);
|
||||
|
||||
/* Convert x to nearest int in all rounding modes, ties have to be rounded
|
||||
consistently with roundtoint. If the result is not representible in an
|
||||
int32_t then the semantics is unspecified. */
|
||||
static int32_t converttoint(double_t);
|
||||
#endif
|
||||
|
||||
/* Helps static branch prediction so hot path can be better optimized. */
|
||||
#ifdef __GNUC__
|
||||
#define predict_true(x) __builtin_expect(!!(x), 1)
|
||||
#define predict_false(x) __builtin_expect(x, 0)
|
||||
#else
|
||||
#define predict_true(x) (x)
|
||||
#define predict_false(x) (x)
|
||||
#endif
|
||||
|
||||
/* Evaluate an expression as the specified type. With standard excess
|
||||
precision handling a type cast or assignment is enough (with
|
||||
-ffloat-store an assignment is required, in old compilers argument
|
||||
passing and return statement may not drop excess precision). */
|
||||
|
||||
static inline float eval_as_float(float x)
|
||||
{
|
||||
float y = x;
|
||||
return y;
|
||||
}
|
||||
|
||||
static inline double eval_as_double(double x)
|
||||
{
|
||||
double y = x;
|
||||
return y;
|
||||
}
|
||||
|
||||
/* fp_barrier returns its input, but limits code transformations
|
||||
as if it had a side-effect (e.g. observable io) and returned
|
||||
an arbitrary value. */
|
||||
|
||||
#ifndef fp_barrierf
|
||||
#define fp_barrierf fp_barrierf
|
||||
static inline float fp_barrierf(float x)
|
||||
{
|
||||
volatile float y = x;
|
||||
return y;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef fp_barrier
|
||||
#define fp_barrier fp_barrier
|
||||
static inline double fp_barrier(double x)
|
||||
{
|
||||
volatile double y = x;
|
||||
return y;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef fp_barrierl
|
||||
#define fp_barrierl fp_barrierl
|
||||
static inline long double fp_barrierl(long double x)
|
||||
{
|
||||
volatile long double y = x;
|
||||
return y;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* fp_force_eval ensures that the input value is computed when that's
|
||||
otherwise unused. To prevent the constant folding of the input
|
||||
expression, an additional fp_barrier may be needed or a compilation
|
||||
mode that does so (e.g. -frounding-math in gcc). Then it can be
|
||||
used to evaluate an expression for its fenv side-effects only. */
|
||||
|
||||
#ifndef fp_force_evalf
|
||||
#define fp_force_evalf fp_force_evalf
|
||||
static inline void fp_force_evalf(float x)
|
||||
{
|
||||
volatile float y;
|
||||
y = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef fp_force_eval
|
||||
#define fp_force_eval fp_force_eval
|
||||
static inline void fp_force_eval(double x)
|
||||
{
|
||||
volatile double y;
|
||||
y = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef fp_force_evall
|
||||
#define fp_force_evall fp_force_evall
|
||||
static inline void fp_force_evall(long double x)
|
||||
{
|
||||
volatile long double y;
|
||||
y = x;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define FORCE_EVAL(x) do { \
|
||||
if (sizeof(x) == sizeof(float)) { \
|
||||
fp_force_evalf(x); \
|
||||
} else if (sizeof(x) == sizeof(double)) { \
|
||||
fp_force_eval(x); \
|
||||
} else { \
|
||||
fp_force_evall(x); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
|
||||
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
|
||||
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
|
||||
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
|
||||
|
||||
#define EXTRACT_WORDS(hi,lo,d) \
|
||||
do { \
|
||||
uint64_t __u = asuint64(d); \
|
||||
(hi) = __u >> 32; \
|
||||
(lo) = (uint32_t)__u; \
|
||||
} while (0)
|
||||
|
||||
#define GET_HIGH_WORD(hi,d) \
|
||||
do { \
|
||||
(hi) = asuint64(d) >> 32; \
|
||||
} while (0)
|
||||
|
||||
#define GET_LOW_WORD(lo,d) \
|
||||
do { \
|
||||
(lo) = (uint32_t)asuint64(d); \
|
||||
} while (0)
|
||||
|
||||
#define INSERT_WORDS(d,hi,lo) \
|
||||
do { \
|
||||
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
|
||||
} while (0)
|
||||
|
||||
#define SET_HIGH_WORD(d,hi) \
|
||||
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
|
||||
|
||||
#define SET_LOW_WORD(d,lo) \
|
||||
INSERT_WORDS(d, asuint64(d)>>32, lo)
|
||||
|
||||
#define GET_FLOAT_WORD(w,d) \
|
||||
do { \
|
||||
(w) = asuint(d); \
|
||||
} while (0)
|
||||
|
||||
#define SET_FLOAT_WORD(d,w) \
|
||||
do { \
|
||||
(d) = asfloat(w); \
|
||||
} while (0)
|
||||
|
||||
extern int __signgam;
|
||||
hidden double __lgamma_r(double, int *);
|
||||
hidden float __lgammaf_r(float, int *);
|
||||
|
||||
/* error handling functions */
|
||||
hidden float __math_xflowf(uint32_t, float);
|
||||
hidden float __math_uflowf(uint32_t);
|
||||
hidden float __math_oflowf(uint32_t);
|
||||
hidden float __math_divzerof(uint32_t);
|
||||
hidden float __math_invalidf(float);
|
||||
hidden double __math_xflow(uint32_t, double);
|
||||
hidden double __math_uflow(uint32_t);
|
||||
hidden double __math_oflow(uint32_t);
|
||||
hidden double __math_divzero(uint32_t);
|
||||
hidden double __math_invalid(double);
|
||||
|
||||
#endif
|
||||
112
be/src/glibc-compatibility/musl/log.c
Normal file
112
be/src/glibc-compatibility/musl/log.c
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Double-precision log(x) function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "log_data.h"
|
||||
|
||||
#define T __log_data.tab
|
||||
#define T2 __log_data.tab2
|
||||
#define B __log_data.poly1
|
||||
#define A __log_data.poly
|
||||
#define Ln2hi __log_data.ln2hi
|
||||
#define Ln2lo __log_data.ln2lo
|
||||
#define N (1 << LOG_TABLE_BITS)
|
||||
#define OFF 0x3fe6000000000000
|
||||
|
||||
/* Top 16 bits of a double. */
|
||||
static inline uint32_t top16(double x)
|
||||
{
|
||||
return asuint64(x) >> 48;
|
||||
}
|
||||
|
||||
double log(double x)
|
||||
{
|
||||
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
|
||||
uint64_t ix, iz, tmp;
|
||||
uint32_t top;
|
||||
int k, i;
|
||||
|
||||
ix = asuint64(x);
|
||||
top = top16(x);
|
||||
#define LO asuint64(1.0 - 0x1p-4)
|
||||
#define HI asuint64(1.0 + 0x1.09p-4)
|
||||
if (predict_false(ix - LO < HI - LO)) {
|
||||
/* Handle close to 1.0 inputs separately. */
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
|
||||
return 0;
|
||||
r = x - 1.0;
|
||||
r2 = r * r;
|
||||
r3 = r * r2;
|
||||
y = r3 *
|
||||
(B[1] + r * B[2] + r2 * B[3] +
|
||||
r3 * (B[4] + r * B[5] + r2 * B[6] +
|
||||
r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
|
||||
/* Worst-case error is around 0.507 ULP. */
|
||||
w = r * 0x1p27;
|
||||
double_t rhi = r + w - w;
|
||||
double_t rlo = r - rhi;
|
||||
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
|
||||
hi = r + w;
|
||||
lo = r - hi + w;
|
||||
lo += B[0] * rlo * (rhi + r);
|
||||
y += lo;
|
||||
y += hi;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
|
||||
/* x < 0x1p-1022 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzero(1);
|
||||
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||
return __math_invalid(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint64(x * 0x1p52);
|
||||
ix -= 52ULL << 52;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
|
||||
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0xfffULL << 52);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = asdouble(iz);
|
||||
|
||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
|
||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||
#if __FP_FAST_FMA
|
||||
/* rounding error: 0x1p-55/N. */
|
||||
r = __builtin_fma(z, invc, -1.0);
|
||||
#else
|
||||
/* rounding error: 0x1p-55/N + 0x1p-66. */
|
||||
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||
#endif
|
||||
kd = (double_t)k;
|
||||
|
||||
/* hi + lo = r + log(c) + k*Ln2. */
|
||||
w = kd * Ln2hi + logc;
|
||||
hi = w + r;
|
||||
lo = w - hi + r + kd * Ln2lo;
|
||||
|
||||
/* log(x) = lo + (log1p(r) - r) + hi. */
|
||||
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||
/* Worst case error if |y| > 0x1p-5:
|
||||
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
|
||||
Worst case error if |y| > 0x1p-4:
|
||||
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
|
||||
y = lo + r2 * A[0] +
|
||||
r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
122
be/src/glibc-compatibility/musl/log2.c
Normal file
122
be/src/glibc-compatibility/musl/log2.c
Normal file
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Double-precision log2(x) function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "log2_data.h"
|
||||
|
||||
#define T __log2_data.tab
|
||||
#define T2 __log2_data.tab2
|
||||
#define B __log2_data.poly1
|
||||
#define A __log2_data.poly
|
||||
#define InvLn2hi __log2_data.invln2hi
|
||||
#define InvLn2lo __log2_data.invln2lo
|
||||
#define N (1 << LOG2_TABLE_BITS)
|
||||
#define OFF 0x3fe6000000000000
|
||||
|
||||
/* Top 16 bits of a double. */
|
||||
static inline uint32_t top16(double x)
|
||||
{
|
||||
return asuint64(x) >> 48;
|
||||
}
|
||||
|
||||
double log2(double x)
|
||||
{
|
||||
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
|
||||
uint64_t ix, iz, tmp;
|
||||
uint32_t top;
|
||||
int k, i;
|
||||
|
||||
ix = asuint64(x);
|
||||
top = top16(x);
|
||||
#define LO asuint64(1.0 - 0x1.5b51p-5)
|
||||
#define HI asuint64(1.0 + 0x1.6ab2p-5)
|
||||
if (predict_false(ix - LO < HI - LO)) {
|
||||
/* Handle close to 1.0 inputs separately. */
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && predict_false(ix == asuint64(1.0)))
|
||||
return 0;
|
||||
r = x - 1.0;
|
||||
#if __FP_FAST_FMA
|
||||
hi = r * InvLn2hi;
|
||||
lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
|
||||
#else
|
||||
double_t rhi, rlo;
|
||||
rhi = asdouble(asuint64(r) & -1ULL << 32);
|
||||
rlo = r - rhi;
|
||||
hi = rhi * InvLn2hi;
|
||||
lo = rlo * InvLn2hi + r * InvLn2lo;
|
||||
#endif
|
||||
r2 = r * r; /* rounding error: 0x1p-62. */
|
||||
r4 = r2 * r2;
|
||||
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
|
||||
p = r2 * (B[0] + r * B[1]);
|
||||
y = hi + p;
|
||||
lo += hi - y + p;
|
||||
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) +
|
||||
r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
|
||||
y += lo;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
|
||||
/* x < 0x1p-1022 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzero(1);
|
||||
if (ix == asuint64(INFINITY)) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
|
||||
return __math_invalid(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint64(x * 0x1p52);
|
||||
ix -= 52ULL << 52;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
|
||||
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0xfffULL << 52);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = asdouble(iz);
|
||||
kd = (double_t)k;
|
||||
|
||||
/* log2(x) = log2(z/c) + log2(c) + k. */
|
||||
/* r ~= z/c - 1, |r| < 1/(2*N). */
|
||||
#if __FP_FAST_FMA
|
||||
/* rounding error: 0x1p-55/N. */
|
||||
r = __builtin_fma(z, invc, -1.0);
|
||||
t1 = r * InvLn2hi;
|
||||
t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
|
||||
#else
|
||||
double_t rhi, rlo;
|
||||
/* rounding error: 0x1p-55/N + 0x1p-65. */
|
||||
r = (z - T2[i].chi - T2[i].clo) * invc;
|
||||
rhi = asdouble(asuint64(r) & -1ULL << 32);
|
||||
rlo = r - rhi;
|
||||
t1 = rhi * InvLn2hi;
|
||||
t2 = rlo * InvLn2hi + r * InvLn2lo;
|
||||
#endif
|
||||
|
||||
/* hi + lo = r/ln2 + log2(c) + k. */
|
||||
t3 = kd + logc;
|
||||
hi = t3 + t1;
|
||||
lo = t3 - hi + t1 + t2;
|
||||
|
||||
/* log2(r+1) = r/ln2 + r^2*poly(r). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
|
||||
r4 = r2 * r2;
|
||||
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
|
||||
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
|
||||
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
|
||||
y = lo + r2 * p + hi;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
201
be/src/glibc-compatibility/musl/log2_data.c
Normal file
201
be/src/glibc-compatibility/musl/log2_data.c
Normal file
@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Data for log2.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "log2_data.h"
|
||||
|
||||
#define N (1 << LOG2_TABLE_BITS)
|
||||
|
||||
const struct log2_data __log2_data = {
|
||||
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
|
||||
.invln2hi = 0x1.7154765200000p+0,
|
||||
.invln2lo = 0x1.705fc2eefa200p-33,
|
||||
.poly1 = {
|
||||
// relative error: 0x1.2fad8188p-63
|
||||
// in -0x1.5b51p-5 0x1.6ab2p-5
|
||||
-0x1.71547652b82fep-1,
|
||||
0x1.ec709dc3a03f7p-2,
|
||||
-0x1.71547652b7c3fp-2,
|
||||
0x1.2776c50f05be4p-2,
|
||||
-0x1.ec709dd768fe5p-3,
|
||||
0x1.a61761ec4e736p-3,
|
||||
-0x1.7153fbc64a79bp-3,
|
||||
0x1.484d154f01b4ap-3,
|
||||
-0x1.289e4a72c383cp-3,
|
||||
0x1.0b32f285aee66p-3,
|
||||
},
|
||||
.poly = {
|
||||
// relative error: 0x1.a72c2bf8p-58
|
||||
// abs error: 0x1.67a552c8p-66
|
||||
// in -0x1.f45p-8 0x1.f45p-8
|
||||
-0x1.71547652b8339p-1,
|
||||
0x1.ec709dc3a04bep-2,
|
||||
-0x1.7154764702ffbp-2,
|
||||
0x1.2776c50034c48p-2,
|
||||
-0x1.ec7b328ea92bcp-3,
|
||||
0x1.a6225e117f92ep-3,
|
||||
},
|
||||
/* Algorithm:
|
||||
|
||||
x = 2^k z
|
||||
log2(x) = k + log2(c) + log2(z/c)
|
||||
log2(z/c) = poly(z/c - 1)
|
||||
|
||||
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||
into the ith one, then table entries are computed as
|
||||
|
||||
tab[i].invc = 1/c
|
||||
tab[i].logc = (double)log2(c)
|
||||
tab2[i].chi = (double)c
|
||||
tab2[i].clo = (double)(c - (double)c)
|
||||
|
||||
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||
floating point invc candidates around 1/center and selecting one for which
|
||||
|
||||
1) the rounding error in 0x1.8p10 + logc is 0,
|
||||
2) the rounding error in z - chi - clo is < 0x1p-64 and
|
||||
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
|
||||
|
||||
Note: 1) ensures that k + logc can be computed without rounding error, 2)
|
||||
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
|
||||
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||
.tab = {
|
||||
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
|
||||
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
|
||||
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
|
||||
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
|
||||
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
|
||||
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
|
||||
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
|
||||
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
|
||||
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
|
||||
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
|
||||
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
|
||||
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
|
||||
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
|
||||
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
|
||||
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
|
||||
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
|
||||
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
|
||||
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
|
||||
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
|
||||
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
|
||||
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
|
||||
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
|
||||
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
|
||||
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
|
||||
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
|
||||
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
|
||||
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
|
||||
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
|
||||
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
|
||||
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
|
||||
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
|
||||
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
|
||||
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
|
||||
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
|
||||
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
|
||||
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
|
||||
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
|
||||
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
|
||||
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
|
||||
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
|
||||
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
|
||||
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
|
||||
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
|
||||
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
|
||||
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
|
||||
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
|
||||
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
|
||||
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
|
||||
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
|
||||
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
|
||||
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
|
||||
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
|
||||
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
|
||||
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
|
||||
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
|
||||
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
|
||||
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
|
||||
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
|
||||
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
|
||||
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
|
||||
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
|
||||
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
|
||||
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
|
||||
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
|
||||
},
|
||||
#if !__FP_FAST_FMA
|
||||
.tab2 = {
|
||||
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
|
||||
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
|
||||
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
|
||||
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
|
||||
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
|
||||
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
|
||||
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
|
||||
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
|
||||
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
|
||||
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
|
||||
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
|
||||
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
|
||||
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
|
||||
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
|
||||
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
|
||||
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
|
||||
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
|
||||
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
|
||||
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
|
||||
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
|
||||
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
|
||||
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
|
||||
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
|
||||
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
|
||||
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
|
||||
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
|
||||
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
|
||||
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
|
||||
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
|
||||
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
|
||||
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
|
||||
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
|
||||
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
|
||||
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
|
||||
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
|
||||
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
|
||||
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
|
||||
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
|
||||
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
|
||||
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
|
||||
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
|
||||
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
|
||||
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
|
||||
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
|
||||
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
|
||||
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
|
||||
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
|
||||
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
|
||||
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
|
||||
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
|
||||
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
|
||||
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
|
||||
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
|
||||
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
|
||||
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
|
||||
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
|
||||
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
|
||||
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
|
||||
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
|
||||
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
|
||||
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
|
||||
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
|
||||
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
|
||||
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
|
||||
},
|
||||
#endif
|
||||
};
|
||||
28
be/src/glibc-compatibility/musl/log2_data.h
Normal file
28
be/src/glibc-compatibility/musl/log2_data.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _LOG2_DATA_H
|
||||
#define _LOG2_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
|
||||
#define LOG2_TABLE_BITS 6
|
||||
#define LOG2_POLY_ORDER 7
|
||||
#define LOG2_POLY1_ORDER 11
|
||||
extern hidden const struct log2_data {
|
||||
double invln2hi;
|
||||
double invln2lo;
|
||||
double poly[LOG2_POLY_ORDER - 1];
|
||||
double poly1[LOG2_POLY1_ORDER - 1];
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << LOG2_TABLE_BITS];
|
||||
#if !__FP_FAST_FMA
|
||||
struct {
|
||||
double chi, clo;
|
||||
} tab2[1 << LOG2_TABLE_BITS];
|
||||
#endif
|
||||
} __log2_data;
|
||||
|
||||
#endif
|
||||
72
be/src/glibc-compatibility/musl/log2f.c
Normal file
72
be/src/glibc-compatibility/musl/log2f.c
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Single-precision log2 function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "log2f_data.h"
|
||||
|
||||
/*
|
||||
LOG2F_TABLE_BITS = 4
|
||||
LOG2F_POLY_ORDER = 4
|
||||
|
||||
ULP error: 0.752 (nearest rounding.)
|
||||
Relative error: 1.9 * 2^-26 (before rounding.)
|
||||
*/
|
||||
|
||||
#define N (1 << LOG2F_TABLE_BITS)
|
||||
#define T __log2f_data.tab
|
||||
#define A __log2f_data.poly
|
||||
#define OFF 0x3f330000
|
||||
|
||||
float log2f(float x)
|
||||
{
|
||||
double_t z, r, r2, p, y, y0, invc, logc;
|
||||
uint32_t ix, iz, top, tmp;
|
||||
int k, i;
|
||||
|
||||
ix = asuint(x);
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
|
||||
return 0;
|
||||
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzerof(1);
|
||||
if (ix == 0x7f800000) /* log2(inf) == inf. */
|
||||
return x;
|
||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||
return __math_invalidf(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint(x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
|
||||
top = tmp & 0xff800000;
|
||||
iz = ix - top;
|
||||
k = (int32_t)tmp >> 23; /* arithmetic shift */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t)asfloat(iz);
|
||||
|
||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t)k;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||
r2 = r * r;
|
||||
y = A[1] * r + A[2];
|
||||
y = A[0] * r2 + y;
|
||||
p = A[3] * r + y0;
|
||||
y = y * r2 + p;
|
||||
return eval_as_float(y);
|
||||
}
|
||||
33
be/src/glibc-compatibility/musl/log2f_data.c
Normal file
33
be/src/glibc-compatibility/musl/log2f_data.c
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Data definition for log2f.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "log2f_data.h"
|
||||
|
||||
const struct log2f_data __log2f_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
|
||||
{ 0x1p+0, 0x0p+0 },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
|
||||
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
|
||||
},
|
||||
.poly = {
|
||||
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
|
||||
0x1.715475f35c8b8p0,
|
||||
}
|
||||
};
|
||||
19
be/src/glibc-compatibility/musl/log2f_data.h
Normal file
19
be/src/glibc-compatibility/musl/log2f_data.h
Normal file
@ -0,0 +1,19 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _LOG2F_DATA_H
|
||||
#define _LOG2F_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
|
||||
#define LOG2F_TABLE_BITS 4
|
||||
#define LOG2F_POLY_ORDER 4
|
||||
extern hidden const struct log2f_data {
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << LOG2F_TABLE_BITS];
|
||||
double poly[LOG2F_POLY_ORDER];
|
||||
} __log2f_data;
|
||||
|
||||
#endif
|
||||
328
be/src/glibc-compatibility/musl/log_data.c
Normal file
328
be/src/glibc-compatibility/musl/log_data.c
Normal file
@ -0,0 +1,328 @@
|
||||
/*
|
||||
* Data for log.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "log_data.h"
|
||||
|
||||
#define N (1 << LOG_TABLE_BITS)
|
||||
|
||||
const struct log_data __log_data = {
|
||||
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||
.ln2lo = 0x1.ef35793c76730p-45,
|
||||
.poly1 = {
|
||||
// relative error: 0x1.c04d76cp-63
|
||||
// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
|
||||
-0x1p-1,
|
||||
0x1.5555555555577p-2,
|
||||
-0x1.ffffffffffdcbp-3,
|
||||
0x1.999999995dd0cp-3,
|
||||
-0x1.55555556745a7p-3,
|
||||
0x1.24924a344de3p-3,
|
||||
-0x1.fffffa4423d65p-4,
|
||||
0x1.c7184282ad6cap-4,
|
||||
-0x1.999eb43b068ffp-4,
|
||||
0x1.78182f7afd085p-4,
|
||||
-0x1.5521375d145cdp-4,
|
||||
},
|
||||
.poly = {
|
||||
// relative error: 0x1.926199e8p-56
|
||||
// abs error: 0x1.882ff33p-65
|
||||
// in -0x1.fp-9 0x1.fp-9
|
||||
-0x1.0000000000001p-1,
|
||||
0x1.555555551305bp-2,
|
||||
-0x1.fffffffeb459p-3,
|
||||
0x1.999b324f10111p-3,
|
||||
-0x1.55575e506c89fp-3,
|
||||
},
|
||||
/* Algorithm:
|
||||
|
||||
x = 2^k z
|
||||
log(x) = k ln2 + log(c) + log(z/c)
|
||||
log(z/c) = poly(z/c - 1)
|
||||
|
||||
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
|
||||
into the ith one, then table entries are computed as
|
||||
|
||||
tab[i].invc = 1/c
|
||||
tab[i].logc = (double)log(c)
|
||||
tab2[i].chi = (double)c
|
||||
tab2[i].clo = (double)(c - (double)c)
|
||||
|
||||
where c is near the center of the subinterval and is chosen by trying +-2^29
|
||||
floating point invc candidates around 1/center and selecting one for which
|
||||
|
||||
1) the rounding error in 0x1.8p9 + logc is 0,
|
||||
2) the rounding error in z - chi - clo is < 0x1p-66 and
|
||||
3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
|
||||
|
||||
Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
|
||||
2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
|
||||
a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
|
||||
that logc + poly(z/c - 1) has small error, however near x == 1 when
|
||||
|log(x)| < 0x1p-4, this is not enough so that is special cased. */
|
||||
.tab = {
|
||||
{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
|
||||
{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
|
||||
{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
|
||||
{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
|
||||
{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
|
||||
{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
|
||||
{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
|
||||
{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
|
||||
{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
|
||||
{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
|
||||
{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
|
||||
{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
|
||||
{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
|
||||
{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
|
||||
{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
|
||||
{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
|
||||
{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
|
||||
{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
|
||||
{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
|
||||
{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
|
||||
{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
|
||||
{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
|
||||
{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
|
||||
{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
|
||||
{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
|
||||
{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
|
||||
{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
|
||||
{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
|
||||
{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
|
||||
{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
|
||||
{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
|
||||
{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
|
||||
{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
|
||||
{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
|
||||
{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
|
||||
{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
|
||||
{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
|
||||
{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
|
||||
{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
|
||||
{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
|
||||
{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
|
||||
{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
|
||||
{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
|
||||
{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
|
||||
{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
|
||||
{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
|
||||
{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
|
||||
{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
|
||||
{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
|
||||
{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
|
||||
{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
|
||||
{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
|
||||
{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
|
||||
{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
|
||||
{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
|
||||
{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
|
||||
{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
|
||||
{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
|
||||
{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
|
||||
{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
|
||||
{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
|
||||
{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
|
||||
{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
|
||||
{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
|
||||
{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
|
||||
{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
|
||||
{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
|
||||
{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
|
||||
{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
|
||||
{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
|
||||
{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
|
||||
{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
|
||||
{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
|
||||
{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
|
||||
{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
|
||||
{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
|
||||
{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
|
||||
{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
|
||||
{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
|
||||
{0x1.008040614b195p+0, -0x1.0040979240000p-9},
|
||||
{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
|
||||
{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
|
||||
{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
|
||||
{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
|
||||
{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
|
||||
{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
|
||||
{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
|
||||
{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
|
||||
{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
|
||||
{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
|
||||
{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
|
||||
{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
|
||||
{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
|
||||
{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
|
||||
{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
|
||||
{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
|
||||
{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
|
||||
{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
|
||||
{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
|
||||
{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
|
||||
{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
|
||||
{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
|
||||
{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
|
||||
{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
|
||||
{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
|
||||
{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
|
||||
{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
|
||||
{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
|
||||
{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
|
||||
{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
|
||||
{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
|
||||
{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
|
||||
{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
|
||||
{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
|
||||
{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
|
||||
{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
|
||||
{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
|
||||
{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
|
||||
{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
|
||||
{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
|
||||
{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
|
||||
{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
|
||||
{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
|
||||
{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
|
||||
{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
|
||||
{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
|
||||
{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
|
||||
{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
|
||||
},
|
||||
#if !__FP_FAST_FMA
|
||||
.tab2 = {
|
||||
{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
|
||||
{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
|
||||
{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
|
||||
{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
|
||||
{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
|
||||
{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
|
||||
{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
|
||||
{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
|
||||
{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
|
||||
{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
|
||||
{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
|
||||
{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
|
||||
{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
|
||||
{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
|
||||
{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
|
||||
{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
|
||||
{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
|
||||
{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
|
||||
{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
|
||||
{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
|
||||
{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
|
||||
{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
|
||||
{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
|
||||
{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
|
||||
{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
|
||||
{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
|
||||
{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
|
||||
{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
|
||||
{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
|
||||
{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
|
||||
{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
|
||||
{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
|
||||
{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
|
||||
{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
|
||||
{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
|
||||
{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
|
||||
{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
|
||||
{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
|
||||
{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
|
||||
{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
|
||||
{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
|
||||
{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
|
||||
{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
|
||||
{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
|
||||
{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
|
||||
{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
|
||||
{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
|
||||
{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
|
||||
{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
|
||||
{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
|
||||
{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
|
||||
{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
|
||||
{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
|
||||
{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
|
||||
{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
|
||||
{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
|
||||
{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
|
||||
{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
|
||||
{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
|
||||
{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
|
||||
{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
|
||||
{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
|
||||
{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
|
||||
{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
|
||||
{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
|
||||
{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
|
||||
{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
|
||||
{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
|
||||
{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
|
||||
{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
|
||||
{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
|
||||
{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
|
||||
{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
|
||||
{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
|
||||
{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
|
||||
{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
|
||||
{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
|
||||
{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
|
||||
{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
|
||||
{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
|
||||
{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
|
||||
{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
|
||||
{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
|
||||
{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
|
||||
{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
|
||||
{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
|
||||
{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
|
||||
{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
|
||||
{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
|
||||
{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
|
||||
{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
|
||||
{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
|
||||
{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
|
||||
{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
|
||||
{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
|
||||
{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
|
||||
{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
|
||||
{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
|
||||
{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
|
||||
{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
|
||||
{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
|
||||
{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
|
||||
{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
|
||||
{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
|
||||
{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
|
||||
{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
|
||||
{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
|
||||
{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
|
||||
{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
|
||||
{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
|
||||
{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
|
||||
{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
|
||||
{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
|
||||
{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
|
||||
{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
|
||||
{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
|
||||
{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
|
||||
{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
|
||||
{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
|
||||
{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
|
||||
{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
|
||||
{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
|
||||
{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
|
||||
{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
|
||||
{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
|
||||
{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
|
||||
{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
|
||||
{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
|
||||
},
|
||||
#endif
|
||||
};
|
||||
28
be/src/glibc-compatibility/musl/log_data.h
Normal file
28
be/src/glibc-compatibility/musl/log_data.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _LOG_DATA_H
|
||||
#define _LOG_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
|
||||
#define LOG_TABLE_BITS 7
|
||||
#define LOG_POLY_ORDER 6
|
||||
#define LOG_POLY1_ORDER 12
|
||||
extern hidden const struct log_data {
|
||||
double ln2hi;
|
||||
double ln2lo;
|
||||
double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||
double poly1[LOG_POLY1_ORDER - 1];
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << LOG_TABLE_BITS];
|
||||
#if !__FP_FAST_FMA
|
||||
struct {
|
||||
double chi, clo;
|
||||
} tab2[1 << LOG_TABLE_BITS];
|
||||
#endif
|
||||
} __log_data;
|
||||
|
||||
#endif
|
||||
71
be/src/glibc-compatibility/musl/logf.c
Normal file
71
be/src/glibc-compatibility/musl/logf.c
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Single-precision log function.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "logf_data.h"
|
||||
|
||||
/*
|
||||
LOGF_TABLE_BITS = 4
|
||||
LOGF_POLY_ORDER = 4
|
||||
|
||||
ULP error: 0.818 (nearest rounding.)
|
||||
Relative error: 1.957 * 2^-26 (before rounding.)
|
||||
*/
|
||||
|
||||
#define T __logf_data.tab
|
||||
#define A __logf_data.poly
|
||||
#define Ln2 __logf_data.ln2
|
||||
#define N (1 << LOGF_TABLE_BITS)
|
||||
#define OFF 0x3f330000
|
||||
|
||||
float logf(float x)
|
||||
{
|
||||
double_t z, r, r2, y, y0, invc, logc;
|
||||
uint32_t ix, iz, tmp;
|
||||
int k, i;
|
||||
|
||||
ix = asuint(x);
|
||||
/* Fix sign of zero with downward rounding when x==1. */
|
||||
if (WANT_ROUNDING && predict_false(ix == 0x3f800000))
|
||||
return 0;
|
||||
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
||||
/* x < 0x1p-126 or inf or nan. */
|
||||
if (ix * 2 == 0)
|
||||
return __math_divzerof(1);
|
||||
if (ix == 0x7f800000) /* log(inf) == inf. */
|
||||
return x;
|
||||
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
|
||||
return __math_invalidf(x);
|
||||
/* x is subnormal, normalize it. */
|
||||
ix = asuint(x * 0x1p23f);
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
|
||||
k = (int32_t)tmp >> 23; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0x1ff << 23);
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t)asfloat(iz);
|
||||
|
||||
/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t)k * Ln2;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r). */
|
||||
r2 = r * r;
|
||||
y = A[1] * r + A[2];
|
||||
y = A[0] * r2 + y;
|
||||
y = y * r2 + (y0 + r);
|
||||
return eval_as_float(y);
|
||||
}
|
||||
33
be/src/glibc-compatibility/musl/logf_data.c
Normal file
33
be/src/glibc-compatibility/musl/logf_data.c
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Data definition for logf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "logf_data.h"
|
||||
|
||||
const struct logf_data __logf_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
|
||||
{ 0x1p+0, 0x0p+0 },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
|
||||
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
|
||||
},
|
||||
.ln2 = 0x1.62e42fefa39efp-1,
|
||||
.poly = {
|
||||
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
|
||||
}
|
||||
};
|
||||
20
be/src/glibc-compatibility/musl/logf_data.h
Normal file
20
be/src/glibc-compatibility/musl/logf_data.h
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _LOGF_DATA_H
|
||||
#define _LOGF_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
|
||||
#define LOGF_TABLE_BITS 4
|
||||
#define LOGF_POLY_ORDER 4
|
||||
extern hidden const struct logf_data {
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << LOGF_TABLE_BITS];
|
||||
double ln2;
|
||||
double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
|
||||
} __logf_data;
|
||||
|
||||
#endif
|
||||
5
be/src/glibc-compatibility/musl/musl_features.h
Normal file
5
be/src/glibc-compatibility/musl/musl_features.h
Normal file
@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#define weak __attribute__((__weak__))
|
||||
#define hidden __attribute__((__visibility__("hidden")))
|
||||
#define predict_false(x) __builtin_expect(x, 0)
|
||||
23
be/src/glibc-compatibility/musl/pipe2.c
Normal file
23
be/src/glibc-compatibility/musl/pipe2.c
Normal file
@ -0,0 +1,23 @@
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <syscall.h>
|
||||
#include "syscall.h"
|
||||
|
||||
int pipe2(int fd[2], int flag)
|
||||
{
|
||||
if (!flag) return pipe(fd);
|
||||
int ret = __syscall(SYS_pipe2, fd, flag);
|
||||
if (ret != -ENOSYS) return __syscall_ret(ret);
|
||||
ret = pipe(fd);
|
||||
if (ret) return ret;
|
||||
if (flag & O_CLOEXEC) {
|
||||
__syscall(SYS_fcntl, fd[0], F_SETFD, FD_CLOEXEC);
|
||||
__syscall(SYS_fcntl, fd[1], F_SETFD, FD_CLOEXEC);
|
||||
}
|
||||
if (flag & O_NONBLOCK) {
|
||||
__syscall(SYS_fcntl, fd[0], F_SETFL, O_NONBLOCK);
|
||||
__syscall(SYS_fcntl, fd[1], F_SETFL, O_NONBLOCK);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
107
be/src/glibc-compatibility/musl/posix_spawn.c
Normal file
107
be/src/glibc-compatibility/musl/posix_spawn.c
Normal file
@ -0,0 +1,107 @@
|
||||
/// Very limited implementation. Half of code from Musl was cut.
|
||||
/// This is Ok, because for now, this function is used only from clang driver.
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <spawn.h>
|
||||
#include <sched.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/wait.h>
|
||||
#include <syscall.h>
|
||||
#include <sys/signal.h>
|
||||
#include <pthread.h>
|
||||
#include <spawn.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
struct args {
|
||||
int p[2];
|
||||
sigset_t oldmask;
|
||||
const char *path;
|
||||
int (*exec)(const char *, char *const *, char *const *);
|
||||
const posix_spawn_file_actions_t *fa;
|
||||
const posix_spawnattr_t *restrict attr;
|
||||
char *const *argv, *const *envp;
|
||||
};
|
||||
|
||||
void __get_handler_set(sigset_t *);
|
||||
|
||||
static int child(void *args_vp)
|
||||
{
|
||||
int ret;
|
||||
struct args *args = args_vp;
|
||||
int p = args->p[1];
|
||||
const posix_spawnattr_t *restrict attr = args->attr;
|
||||
|
||||
close(args->p[0]);
|
||||
|
||||
/* Close-on-exec flag may have been lost if we moved the pipe
|
||||
* to a different fd. We don't use F_DUPFD_CLOEXEC above because
|
||||
* it would fail on older kernels and atomicity is not needed --
|
||||
* in this process there are no threads or signal handlers. */
|
||||
__syscall(SYS_fcntl, p, F_SETFD, FD_CLOEXEC);
|
||||
|
||||
pthread_sigmask(SIG_SETMASK, (attr->__flags & POSIX_SPAWN_SETSIGMASK)
|
||||
? &attr->__ss : &args->oldmask, 0);
|
||||
|
||||
args->exec(args->path, args->argv, args->envp);
|
||||
ret = -errno;
|
||||
|
||||
/* Since sizeof errno < PIPE_BUF, the write is atomic. */
|
||||
ret = -ret;
|
||||
if (ret) while (__syscall(SYS_write, p, &ret, sizeof ret) < 0);
|
||||
_exit(127);
|
||||
}
|
||||
|
||||
|
||||
int __posix_spawnx(pid_t *restrict res, const char *restrict path,
|
||||
int (*exec)(const char *, char *const *, char *const *),
|
||||
const posix_spawn_file_actions_t *fa,
|
||||
const posix_spawnattr_t *restrict attr,
|
||||
char *const argv[restrict], char *const envp[restrict])
|
||||
{
|
||||
pid_t pid;
|
||||
char stack[1024];
|
||||
int ec=0, cs;
|
||||
struct args args;
|
||||
|
||||
if (pipe2(args.p, O_CLOEXEC))
|
||||
return errno;
|
||||
|
||||
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
|
||||
|
||||
args.path = path;
|
||||
args.exec = exec;
|
||||
args.fa = fa;
|
||||
args.attr = attr ? attr : &(const posix_spawnattr_t){0};
|
||||
args.argv = argv;
|
||||
args.envp = envp;
|
||||
|
||||
pid = clone(child, stack+sizeof stack,
|
||||
CLONE_VM|CLONE_VFORK|SIGCHLD, &args);
|
||||
close(args.p[1]);
|
||||
|
||||
if (pid > 0) {
|
||||
if (read(args.p[0], &ec, sizeof ec) != sizeof ec) ec = 0;
|
||||
else waitpid(pid, &(int){0}, 0);
|
||||
} else {
|
||||
ec = -pid;
|
||||
}
|
||||
|
||||
close(args.p[0]);
|
||||
|
||||
if (!ec && res) *res = pid;
|
||||
|
||||
pthread_setcancelstate(cs, 0);
|
||||
|
||||
return ec;
|
||||
}
|
||||
|
||||
int posix_spawn(pid_t *restrict res, const char *restrict path,
|
||||
const posix_spawn_file_actions_t *fa,
|
||||
const posix_spawnattr_t *restrict attr,
|
||||
char *const argv[restrict], char *const envp[restrict])
|
||||
{
|
||||
return __posix_spawnx(res, path, execve, fa, attr, argv, envp);
|
||||
}
|
||||
343
be/src/glibc-compatibility/musl/pow.c
Normal file
343
be/src/glibc-compatibility/musl/pow.c
Normal file
@ -0,0 +1,343 @@
|
||||
/*
|
||||
* Double-precision x^y function.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp_data.h"
|
||||
#include "pow_data.h"
|
||||
|
||||
/*
|
||||
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
|
||||
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
|
||||
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
|
||||
*/
|
||||
|
||||
#define T __pow_log_data.tab
|
||||
#define A __pow_log_data.poly
|
||||
#define Ln2hi __pow_log_data.ln2hi
|
||||
#define Ln2lo __pow_log_data.ln2lo
|
||||
#define N (1 << POW_LOG_TABLE_BITS)
|
||||
#define OFF 0x3fe6955500000000
|
||||
|
||||
/* Top 12 bits of a double (sign and exponent bits). */
|
||||
static inline uint32_t top12(double x)
|
||||
{
|
||||
return asuint64(x) >> 52;
|
||||
}
|
||||
|
||||
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
|
||||
additional 15 bits precision. IX is the bit representation of x, but
|
||||
normalized in the subnormal range using the sign bit for the exponent. */
|
||||
static inline double_t log_inline(uint64_t ix, double_t *tail)
|
||||
{
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
|
||||
uint64_t iz, tmp;
|
||||
int k, i;
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
|
||||
k = (int64_t)tmp >> 52; /* arithmetic shift */
|
||||
iz = ix - (tmp & 0xfffULL << 52);
|
||||
z = asdouble(iz);
|
||||
kd = (double_t)k;
|
||||
|
||||
/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
logctail = T[i].logctail;
|
||||
|
||||
/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|
||||
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
|
||||
#if __FP_FAST_FMA
|
||||
r = __builtin_fma(z, invc, -1.0);
|
||||
#else
|
||||
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
|
||||
double_t zhi = asdouble((iz + (1ULL << 31)) & (-1ULL << 32));
|
||||
double_t zlo = z - zhi;
|
||||
double_t rhi = zhi * invc - 1.0;
|
||||
double_t rlo = zlo * invc;
|
||||
r = rhi + rlo;
|
||||
#endif
|
||||
|
||||
/* k*Ln2 + log(c) + r. */
|
||||
t1 = kd * Ln2hi + logc;
|
||||
t2 = t1 + r;
|
||||
lo1 = kd * Ln2lo + logctail;
|
||||
lo2 = t1 - t2 + r;
|
||||
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
double_t ar, ar2, ar3, lo3, lo4;
|
||||
ar = A[0] * r; /* A[0] = -0.5. */
|
||||
ar2 = r * ar;
|
||||
ar3 = r * ar2;
|
||||
/* k*Ln2 + log(c) + r + A[0]*r*r. */
|
||||
#if __FP_FAST_FMA
|
||||
hi = t2 + ar2;
|
||||
lo3 = __builtin_fma(ar, r, -ar2);
|
||||
lo4 = t2 - hi + ar2;
|
||||
#else
|
||||
double_t arhi = A[0] * rhi;
|
||||
double_t arhi2 = rhi * arhi;
|
||||
hi = t2 + arhi2;
|
||||
lo3 = rlo * (ar + arhi);
|
||||
lo4 = t2 - hi + arhi2;
|
||||
#endif
|
||||
/* p = log1p(r) - r - A[0]*r*r. */
|
||||
p = (ar3 * (A[1] + r * A[2] +
|
||||
ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
|
||||
lo = lo1 + lo2 + lo3 + lo4 + p;
|
||||
y = hi + lo;
|
||||
*tail = hi - y + lo;
|
||||
return y;
|
||||
}
|
||||
|
||||
#undef N
|
||||
#undef T
|
||||
#define N (1 << EXP_TABLE_BITS)
|
||||
#define InvLn2N __exp_data.invln2N
|
||||
#define NegLn2hiN __exp_data.negln2hiN
|
||||
#define NegLn2loN __exp_data.negln2loN
|
||||
#define Shift __exp_data.shift
|
||||
#define T __exp_data.tab
|
||||
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
|
||||
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
|
||||
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
|
||||
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
|
||||
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
|
||||
|
||||
/* Handle cases that may overflow or underflow when computing the result that
|
||||
is scale*(1+TMP) without intermediate rounding. The bit representation of
|
||||
scale is in SBITS, however it has a computed exponent that may have
|
||||
overflown into the sign bit so that needs to be adjusted before using it as
|
||||
a double. (int32_t)KI is the k used in the argument reduction and exponent
|
||||
adjustment of scale, positive k here means the result may overflow and
|
||||
negative k means the result may underflow. */
|
||||
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)
|
||||
{
|
||||
double_t scale, y;
|
||||
|
||||
if ((ki & 0x80000000) == 0) {
|
||||
/* k > 0, the exponent of scale might have overflowed by <= 460. */
|
||||
sbits -= 1009ull << 52;
|
||||
scale = asdouble(sbits);
|
||||
y = 0x1p1009 * (scale + scale * tmp);
|
||||
return eval_as_double(y);
|
||||
}
|
||||
/* k < 0, need special care in the subnormal range. */
|
||||
sbits += 1022ull << 52;
|
||||
/* Note: sbits is signed scale. */
|
||||
scale = asdouble(sbits);
|
||||
y = scale + scale * tmp;
|
||||
if (fabs(y) < 1.0) {
|
||||
/* Round y to the right precision before scaling it into the subnormal
|
||||
range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
||||
E is the worst-case ulp error outside the subnormal range. So this
|
||||
is only useful if the goal is better than 1 ulp worst-case error. */
|
||||
double_t hi, lo, one = 1.0;
|
||||
if (y < 0.0)
|
||||
one = -1.0;
|
||||
lo = scale - y + scale * tmp;
|
||||
hi = one + y;
|
||||
lo = one - hi + y + lo;
|
||||
y = eval_as_double(hi + lo) - one;
|
||||
/* Fix the sign of 0. */
|
||||
if (y == 0.0)
|
||||
y = asdouble(sbits & 0x8000000000000000);
|
||||
/* The underflow exception needs to be signaled explicitly. */
|
||||
fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
|
||||
}
|
||||
y = 0x1p-1022 * y;
|
||||
return eval_as_double(y);
|
||||
}
|
||||
|
||||
#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)
|
||||
|
||||
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
|
||||
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
|
||||
static inline double exp_inline(double_t x, double_t xtail, uint32_t sign_bias)
|
||||
{
|
||||
uint32_t abstop;
|
||||
uint64_t ki, idx, top, sbits;
|
||||
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
|
||||
double_t kd, z, r, r2, scale, tail, tmp;
|
||||
|
||||
abstop = top12(x) & 0x7ff;
|
||||
if (predict_false(abstop - top12(0x1p-54) >=
|
||||
top12(512.0) - top12(0x1p-54))) {
|
||||
if (abstop - top12(0x1p-54) >= 0x80000000) {
|
||||
/* Avoid spurious underflow for tiny x. */
|
||||
/* Note: 0 is common input. */
|
||||
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
|
||||
return sign_bias ? -one : one;
|
||||
}
|
||||
if (abstop >= top12(1024.0)) {
|
||||
/* Note: inf and nan are already handled. */
|
||||
if (asuint64(x) >> 63)
|
||||
return __math_uflow(sign_bias);
|
||||
else
|
||||
return __math_oflow(sign_bias);
|
||||
}
|
||||
/* Large x is special cased below. */
|
||||
abstop = 0;
|
||||
}
|
||||
|
||||
/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
|
||||
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
|
||||
z = InvLn2N * x;
|
||||
#if TOINT_INTRINSICS
|
||||
kd = roundtoint(z);
|
||||
ki = converttoint(z);
|
||||
#elif EXP_USE_TOINT_NARROW
|
||||
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
|
||||
kd = eval_as_double(z + Shift);
|
||||
ki = asuint64(kd) >> 16;
|
||||
kd = (double_t)(int32_t)ki;
|
||||
#else
|
||||
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
|
||||
kd = eval_as_double(z + Shift);
|
||||
ki = asuint64(kd);
|
||||
kd -= Shift;
|
||||
#endif
|
||||
r = x + kd * NegLn2hiN + kd * NegLn2loN;
|
||||
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
|
||||
r += xtail;
|
||||
/* 2^(k/N) ~= scale * (1 + tail). */
|
||||
idx = 2 * (ki % N);
|
||||
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
|
||||
tail = asdouble(T[idx]);
|
||||
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
||||
sbits = T[idx + 1] + top;
|
||||
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
|
||||
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
||||
r2 = r * r;
|
||||
/* Without fma the worst case error is 0.25/N ulp larger. */
|
||||
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
|
||||
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
||||
if (predict_false(abstop == 0))
|
||||
return specialcase(tmp, sbits, ki);
|
||||
scale = asdouble(sbits);
|
||||
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
|
||||
is no spurious underflow here even without fma. */
|
||||
return eval_as_double(scale + scale * tmp);
|
||||
}
|
||||
|
||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||
the bit representation of a non-zero finite floating-point value. */
|
||||
static inline int checkint(uint64_t iy)
|
||||
{
|
||||
int e = iy >> 52 & 0x7ff;
|
||||
if (e < 0x3ff)
|
||||
return 0;
|
||||
if (e > 0x3ff + 52)
|
||||
return 2;
|
||||
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
|
||||
return 0;
|
||||
if (iy & (1ULL << (0x3ff + 52 - e)))
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
|
||||
static inline int zeroinfnan(uint64_t i)
|
||||
{
|
||||
return 2 * i - 1 >= 2 * asuint64(INFINITY) - 1;
|
||||
}
|
||||
|
||||
double pow(double x, double y)
|
||||
{
|
||||
uint32_t sign_bias = 0;
|
||||
uint64_t ix, iy;
|
||||
uint32_t topx, topy;
|
||||
|
||||
ix = asuint64(x);
|
||||
iy = asuint64(y);
|
||||
topx = top12(x);
|
||||
topy = top12(y);
|
||||
if (predict_false(topx - 0x001 >= 0x7ff - 0x001 ||
|
||||
(topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) {
|
||||
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
|
||||
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
|
||||
/* Special cases: (x < 0x1p-126 or inf or nan) or
|
||||
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
|
||||
if (predict_false(zeroinfnan(iy))) {
|
||||
if (2 * iy == 0)
|
||||
return issignaling_inline(x) ? x + y : 1.0;
|
||||
if (ix == asuint64(1.0))
|
||||
return issignaling_inline(y) ? x + y : 1.0;
|
||||
if (2 * ix > 2 * asuint64(INFINITY) ||
|
||||
2 * iy > 2 * asuint64(INFINITY))
|
||||
return x + y;
|
||||
if (2 * ix == 2 * asuint64(1.0))
|
||||
return 1.0;
|
||||
if ((2 * ix < 2 * asuint64(1.0)) == !(iy >> 63))
|
||||
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||
return y * y;
|
||||
}
|
||||
if (predict_false(zeroinfnan(ix))) {
|
||||
double_t x2 = x * x;
|
||||
if (ix >> 63 && checkint(iy) == 1)
|
||||
x2 = -x2;
|
||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||
thus division by zero exception can be signaled spuriously. */
|
||||
return iy >> 63 ? fp_barrier(1 / x2) : x2;
|
||||
}
|
||||
/* Here x and y are non-zero finite. */
|
||||
if (ix >> 63) {
|
||||
/* Finite x < 0. */
|
||||
int yint = checkint(iy);
|
||||
if (yint == 0)
|
||||
return __math_invalid(x);
|
||||
if (yint == 1)
|
||||
sign_bias = SIGN_BIAS;
|
||||
ix &= 0x7fffffffffffffff;
|
||||
topx &= 0x7ff;
|
||||
}
|
||||
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) {
|
||||
/* Note: sign_bias == 0 here because y is not odd. */
|
||||
if (ix == asuint64(1.0))
|
||||
return 1.0;
|
||||
if ((topy & 0x7ff) < 0x3be) {
|
||||
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
|
||||
if (WANT_ROUNDING)
|
||||
return ix > asuint64(1.0) ? 1.0 + y :
|
||||
1.0 - y;
|
||||
else
|
||||
return 1.0;
|
||||
}
|
||||
return (ix > asuint64(1.0)) == (topy < 0x800) ?
|
||||
__math_oflow(0) :
|
||||
__math_uflow(0);
|
||||
}
|
||||
if (topx == 0) {
|
||||
/* Normalize subnormal x so exponent becomes negative. */
|
||||
ix = asuint64(x * 0x1p52);
|
||||
ix &= 0x7fffffffffffffff;
|
||||
ix -= 52ULL << 52;
|
||||
}
|
||||
}
|
||||
|
||||
double_t lo;
|
||||
double_t hi = log_inline(ix, &lo);
|
||||
double_t ehi, elo;
|
||||
#if __FP_FAST_FMA
|
||||
ehi = y * hi;
|
||||
elo = y * lo + __builtin_fma(y, hi, -ehi);
|
||||
#else
|
||||
double_t yhi = asdouble(iy & -1ULL << 27);
|
||||
double_t ylo = y - yhi;
|
||||
double_t lhi = asdouble(asuint64(hi) & -1ULL << 27);
|
||||
double_t llo = hi - lhi + lo;
|
||||
ehi = yhi * lhi;
|
||||
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
|
||||
#endif
|
||||
return exp_inline(ehi, elo, sign_bias);
|
||||
}
|
||||
180
be/src/glibc-compatibility/musl/pow_data.c
Normal file
180
be/src/glibc-compatibility/musl/pow_data.c
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
* Data for the log part of pow.
|
||||
*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "pow_data.h"
|
||||
|
||||
#define N (1 << POW_LOG_TABLE_BITS)
|
||||
|
||||
const struct pow_log_data __pow_log_data = {
|
||||
.ln2hi = 0x1.62e42fefa3800p-1,
|
||||
.ln2lo = 0x1.ef35793c76730p-45,
|
||||
.poly = {
|
||||
// relative error: 0x1.11922ap-70
|
||||
// in -0x1.6bp-8 0x1.6bp-8
|
||||
// Coefficients are scaled to match the scaling during evaluation.
|
||||
-0x1p-1,
|
||||
0x1.555555555556p-2 * -2,
|
||||
-0x1.0000000000006p-2 * -2,
|
||||
0x1.999999959554ep-3 * 4,
|
||||
-0x1.555555529a47ap-3 * 4,
|
||||
0x1.2495b9b4845e9p-3 * -8,
|
||||
-0x1.0002b8b263fc3p-3 * -8,
|
||||
},
|
||||
/* Algorithm:
|
||||
|
||||
x = 2^k z
|
||||
log(x) = k ln2 + log(c) + log(z/c)
|
||||
log(z/c) = poly(z/c - 1)
|
||||
|
||||
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
|
||||
and z falls into the ith one, then table entries are computed as
|
||||
|
||||
tab[i].invc = 1/c
|
||||
tab[i].logc = round(0x1p43*log(c))/0x1p43
|
||||
tab[i].logctail = (double)(log(c) - logc)
|
||||
|
||||
where c is chosen near the center of the subinterval such that 1/c has only a
|
||||
few precision bits so z/c - 1 is exactly representible as double:
|
||||
|
||||
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
|
||||
|
||||
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
|
||||
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
|
||||
error and the interval for z is selected such that near x == 1, where log(x)
|
||||
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
|
||||
.tab = {
|
||||
#define A(a, b, c) {a, 0, b, c},
|
||||
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
|
||||
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
|
||||
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
|
||||
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
|
||||
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
|
||||
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
|
||||
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
|
||||
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
|
||||
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
|
||||
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
|
||||
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
|
||||
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
|
||||
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
|
||||
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
|
||||
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
|
||||
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
|
||||
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
|
||||
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
|
||||
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
|
||||
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
|
||||
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
|
||||
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
|
||||
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
|
||||
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
|
||||
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
|
||||
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
|
||||
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
|
||||
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
|
||||
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
|
||||
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
|
||||
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
|
||||
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
|
||||
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
|
||||
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
|
||||
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
|
||||
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
|
||||
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
|
||||
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
|
||||
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
|
||||
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
|
||||
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
|
||||
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
|
||||
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
|
||||
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
|
||||
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
|
||||
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
|
||||
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
|
||||
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
|
||||
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
|
||||
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
|
||||
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
|
||||
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
|
||||
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
|
||||
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
|
||||
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
|
||||
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
|
||||
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
|
||||
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
|
||||
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
|
||||
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
|
||||
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
|
||||
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
|
||||
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
|
||||
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
|
||||
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
|
||||
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
|
||||
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
|
||||
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
|
||||
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
|
||||
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
|
||||
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
|
||||
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
|
||||
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
|
||||
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
|
||||
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
|
||||
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
|
||||
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
|
||||
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
|
||||
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
|
||||
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
|
||||
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
|
||||
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
|
||||
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
|
||||
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
|
||||
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
|
||||
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
|
||||
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
|
||||
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
|
||||
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
|
||||
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
|
||||
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
|
||||
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
|
||||
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
|
||||
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
|
||||
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
|
||||
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
|
||||
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
|
||||
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
|
||||
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
|
||||
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
|
||||
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
|
||||
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
|
||||
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
|
||||
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
|
||||
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
|
||||
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
|
||||
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
|
||||
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
|
||||
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
|
||||
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
|
||||
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
|
||||
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
|
||||
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
|
||||
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
|
||||
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
|
||||
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
|
||||
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
|
||||
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
|
||||
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
|
||||
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
|
||||
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
|
||||
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
|
||||
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
|
||||
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
|
||||
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
|
||||
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
|
||||
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
|
||||
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
|
||||
},
|
||||
};
|
||||
22
be/src/glibc-compatibility/musl/pow_data.h
Normal file
22
be/src/glibc-compatibility/musl/pow_data.h
Normal file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _POW_DATA_H
|
||||
#define _POW_DATA_H
|
||||
|
||||
#include "musl_features.h"
|
||||
|
||||
#define POW_LOG_TABLE_BITS 7
|
||||
#define POW_LOG_POLY_ORDER 8
|
||||
extern hidden const struct pow_log_data {
|
||||
double ln2hi;
|
||||
double ln2lo;
|
||||
double poly[POW_LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
|
||||
/* Note: the pad field is unused, but allows slightly faster indexing. */
|
||||
struct {
|
||||
double invc, pad, logc, logctail;
|
||||
} tab[1 << POW_LOG_TABLE_BITS];
|
||||
} __pow_log_data;
|
||||
|
||||
#endif
|
||||
185
be/src/glibc-compatibility/musl/powf.c
Normal file
185
be/src/glibc-compatibility/musl/powf.c
Normal file
@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "libm.h"
|
||||
#include "exp2f_data.h"
|
||||
#include "powf_data.h"
|
||||
|
||||
/*
|
||||
POWF_LOG2_POLY_ORDER = 5
|
||||
EXP2F_TABLE_BITS = 5
|
||||
|
||||
ULP error: 0.82 (~ 0.5 + relerr*2^24)
|
||||
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
|
||||
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
|
||||
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
|
||||
*/
|
||||
|
||||
#define N (1 << POWF_LOG2_TABLE_BITS)
|
||||
#define T __powf_log2_data.tab
|
||||
#define A __powf_log2_data.poly
|
||||
#define OFF 0x3f330000
|
||||
|
||||
/* Subnormal input is normalized so ix has negative biased exponent.
|
||||
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
|
||||
static inline double_t log2_inline(uint32_t ix)
|
||||
{
|
||||
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
|
||||
uint32_t iz, top, tmp;
|
||||
int k, i;
|
||||
|
||||
/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
|
||||
The range is split into N subintervals.
|
||||
The ith subinterval contains z and c is near its center. */
|
||||
tmp = ix - OFF;
|
||||
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
|
||||
top = tmp & 0xff800000;
|
||||
iz = ix - top;
|
||||
k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
|
||||
invc = T[i].invc;
|
||||
logc = T[i].logc;
|
||||
z = (double_t)asfloat(iz);
|
||||
|
||||
/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
|
||||
r = z * invc - 1;
|
||||
y0 = logc + (double_t)k;
|
||||
|
||||
/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
|
||||
r2 = r * r;
|
||||
y = A[0] * r + A[1];
|
||||
p = A[2] * r + A[3];
|
||||
r4 = r2 * r2;
|
||||
q = A[4] * r + y0;
|
||||
q = p * r2 + q;
|
||||
y = y * r4 + q;
|
||||
return y;
|
||||
}
|
||||
|
||||
#undef N
|
||||
#undef T
|
||||
#define N (1 << EXP2F_TABLE_BITS)
|
||||
#define T __exp2f_data.tab
|
||||
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
|
||||
|
||||
/* The output of log2 and thus the input of exp2 is either scaled by N
|
||||
(in case of fast toint intrinsics) or not. The unscaled xd must be
|
||||
in [-1021,1023], sign_bias sets the sign of the result. */
|
||||
static inline float exp2_inline(double_t xd, uint32_t sign_bias)
|
||||
{
|
||||
uint64_t ki, ski, t;
|
||||
double_t kd, z, r, r2, y, s;
|
||||
|
||||
#if TOINT_INTRINSICS
|
||||
#define C __exp2f_data.poly_scaled
|
||||
/* N*x = k + r with r in [-1/2, 1/2] */
|
||||
kd = roundtoint(xd); /* k */
|
||||
ki = converttoint(xd);
|
||||
#else
|
||||
#define C __exp2f_data.poly
|
||||
#define SHIFT __exp2f_data.shift_scaled
|
||||
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
|
||||
kd = eval_as_double(xd + SHIFT);
|
||||
ki = asuint64(kd);
|
||||
kd -= SHIFT; /* k/N */
|
||||
#endif
|
||||
r = xd - kd;
|
||||
|
||||
/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
|
||||
t = T[ki % N];
|
||||
ski = ki + sign_bias;
|
||||
t += ski << (52 - EXP2F_TABLE_BITS);
|
||||
s = asdouble(t);
|
||||
z = C[0] * r + C[1];
|
||||
r2 = r * r;
|
||||
y = C[2] * r + 1;
|
||||
y = z * r2 + y;
|
||||
y = y * s;
|
||||
return eval_as_float(y);
|
||||
}
|
||||
|
||||
/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
|
||||
the bit representation of a non-zero finite floating-point value. */
|
||||
static inline int checkint(uint32_t iy)
|
||||
{
|
||||
int e = iy >> 23 & 0xff;
|
||||
if (e < 0x7f)
|
||||
return 0;
|
||||
if (e > 0x7f + 23)
|
||||
return 2;
|
||||
if (iy & ((1 << (0x7f + 23 - e)) - 1))
|
||||
return 0;
|
||||
if (iy & (1 << (0x7f + 23 - e)))
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static inline int zeroinfnan(uint32_t ix)
|
||||
{
|
||||
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
|
||||
}
|
||||
|
||||
float powf(float x, float y)
|
||||
{
|
||||
uint32_t sign_bias = 0;
|
||||
uint32_t ix, iy;
|
||||
|
||||
ix = asuint(x);
|
||||
iy = asuint(y);
|
||||
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
|
||||
zeroinfnan(iy))) {
|
||||
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
|
||||
if (predict_false(zeroinfnan(iy))) {
|
||||
if (2 * iy == 0)
|
||||
return issignalingf_inline(x) ? x + y : 1.0f;
|
||||
if (ix == 0x3f800000)
|
||||
return issignalingf_inline(y) ? x + y : 1.0f;
|
||||
if (2 * ix > 2u * 0x7f800000 ||
|
||||
2 * iy > 2u * 0x7f800000)
|
||||
return x + y;
|
||||
if (2 * ix == 2 * 0x3f800000)
|
||||
return 1.0f;
|
||||
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
|
||||
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
|
||||
return y * y;
|
||||
}
|
||||
if (predict_false(zeroinfnan(ix))) {
|
||||
float_t x2 = x * x;
|
||||
if (ix & 0x80000000 && checkint(iy) == 1)
|
||||
x2 = -x2;
|
||||
/* Without the barrier some versions of clang hoist the 1/x2 and
|
||||
thus division by zero exception can be signaled spuriously. */
|
||||
return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
|
||||
}
|
||||
/* x and y are non-zero finite. */
|
||||
if (ix & 0x80000000) {
|
||||
/* Finite x < 0. */
|
||||
int yint = checkint(iy);
|
||||
if (yint == 0)
|
||||
return __math_invalidf(x);
|
||||
if (yint == 1)
|
||||
sign_bias = SIGN_BIAS;
|
||||
ix &= 0x7fffffff;
|
||||
}
|
||||
if (ix < 0x00800000) {
|
||||
/* Normalize subnormal x so exponent becomes negative. */
|
||||
ix = asuint(x * 0x1p23f);
|
||||
ix &= 0x7fffffff;
|
||||
ix -= 23 << 23;
|
||||
}
|
||||
}
|
||||
double_t logx = log2_inline(ix);
|
||||
double_t ylogx = y * logx; /* cannot overflow, y is single prec. */
|
||||
if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
|
||||
asuint64(126.0 * POWF_SCALE) >> 47)) {
|
||||
/* |y*log(x)| >= 126. */
|
||||
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
|
||||
return __math_oflowf(sign_bias);
|
||||
if (ylogx <= -150.0 * POWF_SCALE)
|
||||
return __math_uflowf(sign_bias);
|
||||
}
|
||||
return exp2_inline(ylogx, sign_bias);
|
||||
}
|
||||
34
be/src/glibc-compatibility/musl/powf_data.c
Normal file
34
be/src/glibc-compatibility/musl/powf_data.c
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Data definition for powf.
|
||||
*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "powf_data.h"
|
||||
|
||||
const struct powf_log2_data __powf_log2_data = {
|
||||
.tab = {
|
||||
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
|
||||
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
|
||||
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
|
||||
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
|
||||
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
|
||||
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
|
||||
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
|
||||
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
|
||||
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
|
||||
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
|
||||
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
|
||||
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
|
||||
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
|
||||
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
|
||||
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
|
||||
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
|
||||
},
|
||||
.poly = {
|
||||
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
|
||||
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
|
||||
0x1.71547652ab82bp0 * POWF_SCALE,
|
||||
}
|
||||
};
|
||||
26
be/src/glibc-compatibility/musl/powf_data.h
Normal file
26
be/src/glibc-compatibility/musl/powf_data.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#ifndef _POWF_DATA_H
|
||||
#define _POWF_DATA_H
|
||||
|
||||
#include "libm.h"
|
||||
#include "exp2f_data.h"
|
||||
|
||||
#define POWF_LOG2_TABLE_BITS 4
|
||||
#define POWF_LOG2_POLY_ORDER 5
|
||||
#if TOINT_INTRINSICS
|
||||
#define POWF_SCALE_BITS EXP2F_TABLE_BITS
|
||||
#else
|
||||
#define POWF_SCALE_BITS 0
|
||||
#endif
|
||||
#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
|
||||
extern hidden const struct powf_log2_data {
|
||||
struct {
|
||||
double invc, logc;
|
||||
} tab[1 << POWF_LOG2_TABLE_BITS];
|
||||
double poly[POWF_LOG2_POLY_ORDER];
|
||||
} __powf_log2_data;
|
||||
|
||||
#endif
|
||||
522
be/src/glibc-compatibility/musl/powl.c
Normal file
522
be/src/glibc-compatibility/musl/powl.c
Normal file
@ -0,0 +1,522 @@
|
||||
/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
|
||||
/*
|
||||
* Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
/* powl.c
|
||||
*
|
||||
* Power function, long double precision
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* long double x, y, z, powl();
|
||||
*
|
||||
* z = powl( x, y );
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Computes x raised to the yth power. Analytically,
|
||||
*
|
||||
* x**y = exp( y log(x) ).
|
||||
*
|
||||
* Following Cody and Waite, this program uses a lookup table
|
||||
* of 2**-i/32 and pseudo extended precision arithmetic to
|
||||
* obtain several extra bits of accuracy in both the logarithm
|
||||
* and the exponential.
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* The relative error of pow(x,y) can be estimated
|
||||
* by y dl ln(2), where dl is the absolute error of
|
||||
* the internally computed base 2 logarithm. At the ends
|
||||
* of the approximation interval the logarithm equal 1/32
|
||||
* and its relative error is about 1 lsb = 1.1e-19. Hence
|
||||
* the predicted relative error in the result is 2.3e-21 y .
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
*
|
||||
* IEEE +-1000 40000 2.8e-18 3.7e-19
|
||||
* .001 < x < 1000, with log(x) uniformly distributed.
|
||||
* -1000 < y < 1000, y uniformly distributed.
|
||||
*
|
||||
* IEEE 0,8700 60000 6.5e-18 1.0e-18
|
||||
* 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
|
||||
*
|
||||
*
|
||||
* ERROR MESSAGES:
|
||||
*
|
||||
* message condition value returned
|
||||
* pow overflow x**y > MAXNUM INFINITY
|
||||
* pow underflow x**y < 1/MAXNUM 0.0
|
||||
* pow domain x<0 and y noninteger 0.0
|
||||
*
|
||||
*/
|
||||
|
||||
#include "libm.h"
|
||||
|
||||
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
||||
long double powl(long double x, long double y)
|
||||
{
|
||||
return pow(x, y);
|
||||
}
|
||||
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
|
||||
|
||||
/* Table size */
|
||||
#define NXT 32
|
||||
|
||||
/* log(1+x) = x - .5x^2 + x^3 * P(z)/Q(z)
|
||||
* on the domain 2^(-1/32) - 1 <= x <= 2^(1/32) - 1
|
||||
*/
|
||||
static const long double P[] = {
|
||||
8.3319510773868690346226E-4L,
|
||||
4.9000050881978028599627E-1L,
|
||||
1.7500123722550302671919E0L,
|
||||
1.4000100839971580279335E0L,
|
||||
};
|
||||
static const long double Q[] = {
|
||||
/* 1.0000000000000000000000E0L,*/
|
||||
5.2500282295834889175431E0L,
|
||||
8.4000598057587009834666E0L,
|
||||
4.2000302519914740834728E0L,
|
||||
};
|
||||
/* A[i] = 2^(-i/32), rounded to IEEE long double precision.
|
||||
* If i is even, A[i] + B[i/2] gives additional accuracy.
|
||||
*/
|
||||
static const long double A[33] = {
|
||||
1.0000000000000000000000E0L,
|
||||
9.7857206208770013448287E-1L,
|
||||
9.5760328069857364691013E-1L,
|
||||
9.3708381705514995065011E-1L,
|
||||
9.1700404320467123175367E-1L,
|
||||
8.9735453750155359320742E-1L,
|
||||
8.7812608018664974155474E-1L,
|
||||
8.5930964906123895780165E-1L,
|
||||
8.4089641525371454301892E-1L,
|
||||
8.2287773907698242225554E-1L,
|
||||
8.0524516597462715409607E-1L,
|
||||
7.8799042255394324325455E-1L,
|
||||
7.7110541270397041179298E-1L,
|
||||
7.5458221379671136985669E-1L,
|
||||
7.3841307296974965571198E-1L,
|
||||
7.2259040348852331001267E-1L,
|
||||
7.0710678118654752438189E-1L,
|
||||
6.9195494098191597746178E-1L,
|
||||
6.7712777346844636413344E-1L,
|
||||
6.6261832157987064729696E-1L,
|
||||
6.4841977732550483296079E-1L,
|
||||
6.3452547859586661129850E-1L,
|
||||
6.2092890603674202431705E-1L,
|
||||
6.0762367999023443907803E-1L,
|
||||
5.9460355750136053334378E-1L,
|
||||
5.8186242938878875689693E-1L,
|
||||
5.6939431737834582684856E-1L,
|
||||
5.5719337129794626814472E-1L,
|
||||
5.4525386633262882960438E-1L,
|
||||
5.3357020033841180906486E-1L,
|
||||
5.2213689121370692017331E-1L,
|
||||
5.1094857432705833910408E-1L,
|
||||
5.0000000000000000000000E-1L,
|
||||
};
|
||||
static const long double B[17] = {
|
||||
0.0000000000000000000000E0L,
|
||||
2.6176170809902549338711E-20L,
|
||||
-1.0126791927256478897086E-20L,
|
||||
1.3438228172316276937655E-21L,
|
||||
1.2207982955417546912101E-20L,
|
||||
-6.3084814358060867200133E-21L,
|
||||
1.3164426894366316434230E-20L,
|
||||
-1.8527916071632873716786E-20L,
|
||||
1.8950325588932570796551E-20L,
|
||||
1.5564775779538780478155E-20L,
|
||||
6.0859793637556860974380E-21L,
|
||||
-2.0208749253662532228949E-20L,
|
||||
1.4966292219224761844552E-20L,
|
||||
3.3540909728056476875639E-21L,
|
||||
-8.6987564101742849540743E-22L,
|
||||
-1.2327176863327626135542E-20L,
|
||||
0.0000000000000000000000E0L,
|
||||
};
|
||||
|
||||
/* 2^x = 1 + x P(x),
|
||||
* on the interval -1/32 <= x <= 0
|
||||
*/
|
||||
static const long double R[] = {
|
||||
1.5089970579127659901157E-5L,
|
||||
1.5402715328927013076125E-4L,
|
||||
1.3333556028915671091390E-3L,
|
||||
9.6181291046036762031786E-3L,
|
||||
5.5504108664798463044015E-2L,
|
||||
2.4022650695910062854352E-1L,
|
||||
6.9314718055994530931447E-1L,
|
||||
};
|
||||
|
||||
#define MEXP (NXT*16384.0L)
|
||||
/* The following if denormal numbers are supported, else -MEXP: */
|
||||
#define MNEXP (-NXT*(16384.0L+64.0L))
|
||||
/* log2(e) - 1 */
|
||||
#define LOG2EA 0.44269504088896340735992L
|
||||
|
||||
#define F W
|
||||
#define Fa Wa
|
||||
#define Fb Wb
|
||||
#define G W
|
||||
#define Ga Wa
|
||||
#define Gb u
|
||||
#define H W
|
||||
#define Ha Wb
|
||||
#define Hb Wb
|
||||
|
||||
static const long double MAXLOGL = 1.1356523406294143949492E4L;
|
||||
static const long double MINLOGL = -1.13994985314888605586758E4L;
|
||||
static const long double LOGE2L = 6.9314718055994530941723E-1L;
|
||||
static const long double huge = 0x1p10000L;
|
||||
/* XXX Prevent gcc from erroneously constant folding this. */
|
||||
static const volatile long double twom10000 = 0x1p-10000L;
|
||||
|
||||
static long double reducl(long double);
|
||||
static long double powil(long double, int);
|
||||
|
||||
long double powl(long double x, long double y)
|
||||
{
|
||||
/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
|
||||
int i, nflg, iyflg, yoddint;
|
||||
long e;
|
||||
volatile long double z=0;
|
||||
long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
|
||||
|
||||
/* make sure no invalid exception is raised by nan comparision */
|
||||
if (isnan(x)) {
|
||||
if (!isnan(y) && y == 0.0)
|
||||
return 1.0;
|
||||
return x;
|
||||
}
|
||||
if (isnan(y)) {
|
||||
if (x == 1.0)
|
||||
return 1.0;
|
||||
return y;
|
||||
}
|
||||
if (x == 1.0)
|
||||
return 1.0; /* 1**y = 1, even if y is nan */
|
||||
if (x == -1.0 && !isfinite(y))
|
||||
return 1.0; /* -1**inf = 1 */
|
||||
if (y == 0.0)
|
||||
return 1.0; /* x**0 = 1, even if x is nan */
|
||||
if (y == 1.0)
|
||||
return x;
|
||||
if (y >= LDBL_MAX) {
|
||||
if (x > 1.0 || x < -1.0)
|
||||
return INFINITY;
|
||||
if (x != 0.0)
|
||||
return 0.0;
|
||||
}
|
||||
if (y <= -LDBL_MAX) {
|
||||
if (x > 1.0 || x < -1.0)
|
||||
return 0.0;
|
||||
if (x != 0.0 || y == -INFINITY)
|
||||
return INFINITY;
|
||||
}
|
||||
if (x >= LDBL_MAX) {
|
||||
if (y > 0.0)
|
||||
return INFINITY;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
w = floorl(y);
|
||||
|
||||
/* Set iyflg to 1 if y is an integer. */
|
||||
iyflg = 0;
|
||||
if (w == y)
|
||||
iyflg = 1;
|
||||
|
||||
/* Test for odd integer y. */
|
||||
yoddint = 0;
|
||||
if (iyflg) {
|
||||
ya = fabsl(y);
|
||||
ya = floorl(0.5 * ya);
|
||||
yb = 0.5 * fabsl(w);
|
||||
if( ya != yb )
|
||||
yoddint = 1;
|
||||
}
|
||||
|
||||
if (x <= -LDBL_MAX) {
|
||||
if (y > 0.0) {
|
||||
if (yoddint)
|
||||
return -INFINITY;
|
||||
return INFINITY;
|
||||
}
|
||||
if (y < 0.0) {
|
||||
if (yoddint)
|
||||
return -0.0;
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
nflg = 0; /* (x<0)**(odd int) */
|
||||
if (x <= 0.0) {
|
||||
if (x == 0.0) {
|
||||
if (y < 0.0) {
|
||||
if (signbit(x) && yoddint)
|
||||
/* (-0.0)**(-odd int) = -inf, divbyzero */
|
||||
return -1.0/0.0;
|
||||
/* (+-0.0)**(negative) = inf, divbyzero */
|
||||
return 1.0/0.0;
|
||||
}
|
||||
if (signbit(x) && yoddint)
|
||||
return -0.0;
|
||||
return 0.0;
|
||||
}
|
||||
if (iyflg == 0)
|
||||
return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
|
||||
/* (x<0)**(integer) */
|
||||
if (yoddint)
|
||||
nflg = 1; /* negate result */
|
||||
x = -x;
|
||||
}
|
||||
/* (+integer)**(integer) */
|
||||
if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
|
||||
w = powil(x, (int)y);
|
||||
return nflg ? -w : w;
|
||||
}
|
||||
|
||||
/* separate significand from exponent */
|
||||
x = frexpl(x, &i);
|
||||
e = i;
|
||||
|
||||
/* find significand in antilog table A[] */
|
||||
i = 1;
|
||||
if (x <= A[17])
|
||||
i = 17;
|
||||
if (x <= A[i+8])
|
||||
i += 8;
|
||||
if (x <= A[i+4])
|
||||
i += 4;
|
||||
if (x <= A[i+2])
|
||||
i += 2;
|
||||
if (x >= A[1])
|
||||
i = -1;
|
||||
i += 1;
|
||||
|
||||
/* Find (x - A[i])/A[i]
|
||||
* in order to compute log(x/A[i]):
|
||||
*
|
||||
* log(x) = log( a x/a ) = log(a) + log(x/a)
|
||||
*
|
||||
* log(x/a) = log(1+v), v = x/a - 1 = (x-a)/a
|
||||
*/
|
||||
x -= A[i];
|
||||
x -= B[i/2];
|
||||
x /= A[i];
|
||||
|
||||
/* rational approximation for log(1+v):
|
||||
*
|
||||
* log(1+v) = v - v**2/2 + v**3 P(v) / Q(v)
|
||||
*/
|
||||
z = x*x;
|
||||
w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
|
||||
w = w - 0.5*z;
|
||||
|
||||
/* Convert to base 2 logarithm:
|
||||
* multiply by log2(e) = 1 + LOG2EA
|
||||
*/
|
||||
z = LOG2EA * w;
|
||||
z += w;
|
||||
z += LOG2EA * x;
|
||||
z += x;
|
||||
|
||||
/* Compute exponent term of the base 2 logarithm. */
|
||||
w = -i;
|
||||
w /= NXT;
|
||||
w += e;
|
||||
/* Now base 2 log of x is w + z. */
|
||||
|
||||
/* Multiply base 2 log by y, in extended precision. */
|
||||
|
||||
/* separate y into large part ya
|
||||
* and small part yb less than 1/NXT
|
||||
*/
|
||||
ya = reducl(y);
|
||||
yb = y - ya;
|
||||
|
||||
/* (w+z)(ya+yb)
|
||||
* = w*ya + w*yb + z*y
|
||||
*/
|
||||
F = z * y + w * yb;
|
||||
Fa = reducl(F);
|
||||
Fb = F - Fa;
|
||||
|
||||
G = Fa + w * ya;
|
||||
Ga = reducl(G);
|
||||
Gb = G - Ga;
|
||||
|
||||
H = Fb + Gb;
|
||||
Ha = reducl(H);
|
||||
w = (Ga + Ha) * NXT;
|
||||
|
||||
/* Test the power of 2 for overflow */
|
||||
if (w > MEXP)
|
||||
return huge * huge; /* overflow */
|
||||
if (w < MNEXP)
|
||||
return twom10000 * twom10000; /* underflow */
|
||||
|
||||
e = w;
|
||||
Hb = H - Ha;
|
||||
|
||||
if (Hb > 0.0) {
|
||||
e += 1;
|
||||
Hb -= 1.0/NXT; /*0.0625L;*/
|
||||
}
|
||||
|
||||
/* Now the product y * log2(x) = Hb + e/NXT.
|
||||
*
|
||||
* Compute base 2 exponential of Hb,
|
||||
* where -0.0625 <= Hb <= 0.
|
||||
*/
|
||||
z = Hb * __polevll(Hb, R, 6); /* z = 2**Hb - 1 */
|
||||
|
||||
/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
|
||||
* Find lookup table entry for the fractional power of 2.
|
||||
*/
|
||||
if (e < 0)
|
||||
i = 0;
|
||||
else
|
||||
i = 1;
|
||||
i = e/NXT + i;
|
||||
e = NXT*i - e;
|
||||
w = A[e];
|
||||
z = w * z; /* 2**-e * ( 1 + (2**Hb-1) ) */
|
||||
z = z + w;
|
||||
z = scalbnl(z, i); /* multiply by integer power of 2 */
|
||||
|
||||
if (nflg)
|
||||
z = -z;
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
/* Find a multiple of 1/NXT that is within 1/NXT of x. */
|
||||
static long double reducl(long double x)
|
||||
{
|
||||
long double t;
|
||||
|
||||
t = x * NXT;
|
||||
t = floorl(t);
|
||||
t = t / NXT;
|
||||
return t;
|
||||
}
|
||||
|
||||
/*
|
||||
* Positive real raised to integer power, long double precision
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* long double x, y, powil();
|
||||
* int n;
|
||||
*
|
||||
* y = powil( x, n );
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns argument x>0 raised to the nth power.
|
||||
* The routine efficiently decomposes n as a sum of powers of
|
||||
* two. The desired power is a product of two-to-the-kth
|
||||
* powers of x. Thus to compute the 32767 power of x requires
|
||||
* 28 multiplications instead of 32767 multiplications.
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic x domain n domain # trials peak rms
|
||||
* IEEE .001,1000 -1022,1023 50000 4.3e-17 7.8e-18
|
||||
* IEEE 1,2 -1022,1023 20000 3.9e-17 7.6e-18
|
||||
* IEEE .99,1.01 0,8700 10000 3.6e-16 7.2e-17
|
||||
*
|
||||
* Returns MAXNUM on overflow, zero on underflow.
|
||||
*/
|
||||
|
||||
static long double powil(long double x, int nn)
|
||||
{
|
||||
long double ww, y;
|
||||
long double s;
|
||||
int n, e, sign, lx;
|
||||
|
||||
if (nn == 0)
|
||||
return 1.0;
|
||||
|
||||
if (nn < 0) {
|
||||
sign = -1;
|
||||
n = -nn;
|
||||
} else {
|
||||
sign = 1;
|
||||
n = nn;
|
||||
}
|
||||
|
||||
/* Overflow detection */
|
||||
|
||||
/* Calculate approximate logarithm of answer */
|
||||
s = x;
|
||||
s = frexpl( s, &lx);
|
||||
e = (lx - 1)*n;
|
||||
if ((e == 0) || (e > 64) || (e < -64)) {
|
||||
s = (s - 7.0710678118654752e-1L) / (s + 7.0710678118654752e-1L);
|
||||
s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
|
||||
} else {
|
||||
s = LOGE2L * e;
|
||||
}
|
||||
|
||||
if (s > MAXLOGL)
|
||||
return huge * huge; /* overflow */
|
||||
|
||||
if (s < MINLOGL)
|
||||
return twom10000 * twom10000; /* underflow */
|
||||
/* Handle tiny denormal answer, but with less accuracy
|
||||
* since roundoff error in 1.0/x will be amplified.
|
||||
* The precise demarcation should be the gradual underflow threshold.
|
||||
*/
|
||||
if (s < -MAXLOGL+2.0) {
|
||||
x = 1.0/x;
|
||||
sign = -sign;
|
||||
}
|
||||
|
||||
/* First bit of the power */
|
||||
if (n & 1)
|
||||
y = x;
|
||||
else
|
||||
y = 1.0;
|
||||
|
||||
ww = x;
|
||||
n >>= 1;
|
||||
while (n) {
|
||||
ww = ww * ww; /* arg to the 2-to-the-kth power */
|
||||
if (n & 1) /* if that bit is set, then include in product */
|
||||
y *= ww;
|
||||
n >>= 1;
|
||||
}
|
||||
|
||||
if (sign < 0)
|
||||
y = 1.0/y;
|
||||
return y;
|
||||
}
|
||||
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
|
||||
// TODO: broken implementation to make things compile
|
||||
long double powl(long double x, long double y)
|
||||
{
|
||||
return pow(x, y);
|
||||
}
|
||||
#endif
|
||||
11
be/src/glibc-compatibility/musl/pwritev.c
Normal file
11
be/src/glibc-compatibility/musl/pwritev.c
Normal file
@ -0,0 +1,11 @@
|
||||
#define _DEFAULT_SOURCE
|
||||
#include <sys/uio.h>
|
||||
#include <unistd.h>
|
||||
#include <syscall.h>
|
||||
#include "syscall.h"
|
||||
|
||||
ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs)
|
||||
{
|
||||
/// There was cancellable syscall (syscall_cp), but I don't care.
|
||||
return syscall(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32));
|
||||
}
|
||||
11
be/src/glibc-compatibility/musl/sched_cpucount.c
Normal file
11
be/src/glibc-compatibility/musl/sched_cpucount.c
Normal file
@ -0,0 +1,11 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
|
||||
int __sched_cpucount(size_t size, const cpu_set_t *set)
|
||||
{
|
||||
size_t i, j, cnt=0;
|
||||
const unsigned char *p = (const void *)set;
|
||||
for (i=0; i<size; i++) for (j=0; j<8; j++)
|
||||
if (p[i] & (1<<j)) cnt++;
|
||||
return cnt;
|
||||
}
|
||||
42
be/src/glibc-compatibility/musl/sched_getcpu.c
Normal file
42
be/src/glibc-compatibility/musl/sched_getcpu.c
Normal file
@ -0,0 +1,42 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include "syscall.h"
|
||||
#include "atomic.h"
|
||||
|
||||
#ifdef VDSO_GETCPU_SYM
|
||||
|
||||
static void *volatile vdso_func;
|
||||
|
||||
typedef long (*getcpu_f)(unsigned *, unsigned *, void *);
|
||||
|
||||
static long getcpu_init(unsigned *cpu, unsigned *node, void *unused)
|
||||
{
|
||||
void *p = __vdsosym(VDSO_GETCPU_VER, VDSO_GETCPU_SYM);
|
||||
getcpu_f f = (getcpu_f)p;
|
||||
a_cas_p(&vdso_func, (void *)getcpu_init, p);
|
||||
return f ? f(cpu, node, unused) : -ENOSYS;
|
||||
}
|
||||
|
||||
static void *volatile vdso_func = (void *)getcpu_init;
|
||||
|
||||
#endif
|
||||
|
||||
int sched_getcpu(void)
|
||||
{
|
||||
int r;
|
||||
unsigned cpu;
|
||||
|
||||
#ifdef VDSO_GETCPU_SYM
|
||||
getcpu_f f = (getcpu_f)vdso_func;
|
||||
if (f) {
|
||||
r = f(&cpu, 0, 0);
|
||||
if (!r) return cpu;
|
||||
if (r != -ENOSYS) return __syscall_ret(r);
|
||||
}
|
||||
#endif
|
||||
|
||||
r = __syscall(SYS_getcpu, &cpu, 0, 0);
|
||||
if (!r) return cpu;
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
11
be/src/glibc-compatibility/musl/secure_env.c
Normal file
11
be/src/glibc-compatibility/musl/secure_env.c
Normal file
@ -0,0 +1,11 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
|
||||
char *getenv(const char *name);
|
||||
|
||||
char *secure_getenv(const char *name)
|
||||
{
|
||||
// NOTE: Let's assume we're not running as SUID
|
||||
return getenv(name);
|
||||
/* return libc.secure ? NULL : getenv(name); */
|
||||
}
|
||||
15
be/src/glibc-compatibility/musl/syscall.h
Normal file
15
be/src/glibc-compatibility/musl/syscall.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <syscall_arch.h>
|
||||
|
||||
typedef long syscall_arg_t;
|
||||
|
||||
__attribute__((visibility("hidden")))
|
||||
long __syscall_ret(unsigned long);
|
||||
|
||||
__attribute__((visibility("hidden")))
|
||||
long __syscall(syscall_arg_t, ...);
|
||||
|
||||
__attribute__((visibility("hidden")))
|
||||
void *__vdsosym(const char *, const char *);
|
||||
11
be/src/glibc-compatibility/musl/syscall_ret.c
Normal file
11
be/src/glibc-compatibility/musl/syscall_ret.c
Normal file
@ -0,0 +1,11 @@
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
|
||||
long __syscall_ret(unsigned long r)
|
||||
{
|
||||
if (r > -4096UL) {
|
||||
errno = -r;
|
||||
return -1;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
12
be/src/glibc-compatibility/musl/timespec_get.c
Normal file
12
be/src/glibc-compatibility/musl/timespec_get.c
Normal file
@ -0,0 +1,12 @@
|
||||
#include <time.h>
|
||||
|
||||
int clock_gettime(clockid_t, struct timespec *);
|
||||
|
||||
/* There is no other implemented value than TIME_UTC; all other values
|
||||
* are considered erroneous. */
|
||||
int timespec_get(struct timespec * ts, int base)
|
||||
{
|
||||
if (base != TIME_UTC) return 0;
|
||||
int ret = clock_gettime(CLOCK_REALTIME, ts);
|
||||
return ret < 0 ? 0 : base;
|
||||
}
|
||||
38
be/src/glibc-compatibility/musl/utimensat.c
Normal file
38
be/src/glibc-compatibility/musl/utimensat.c
Normal file
@ -0,0 +1,38 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include "syscall.h"
|
||||
#include <syscall.h>
|
||||
|
||||
int utimensat(int fd, const char *path, const struct timespec times[2], int flags)
|
||||
{
|
||||
int r = __syscall(SYS_utimensat, fd, path, times, flags);
|
||||
#ifdef SYS_futimesat
|
||||
if (r != -ENOSYS || flags) return __syscall_ret(r);
|
||||
struct timeval *tv = 0, tmp[2];
|
||||
if (times) {
|
||||
int i;
|
||||
tv = tmp;
|
||||
for (i=0; i<2; i++) {
|
||||
if (times[i].tv_nsec >= 1000000000ULL) {
|
||||
if (times[i].tv_nsec == UTIME_NOW &&
|
||||
times[1-i].tv_nsec == UTIME_NOW) {
|
||||
tv = 0;
|
||||
break;
|
||||
}
|
||||
if (times[i].tv_nsec == UTIME_OMIT)
|
||||
return __syscall_ret(-ENOSYS);
|
||||
return __syscall_ret(-EINVAL);
|
||||
}
|
||||
tmp[i].tv_sec = times[i].tv_sec;
|
||||
tmp[i].tv_usec = times[i].tv_nsec / 1000;
|
||||
}
|
||||
}
|
||||
|
||||
r = __syscall(SYS_futimesat, fd, path, tv);
|
||||
if (r != -ENOSYS || fd != AT_FDCWD) return __syscall_ret(r);
|
||||
r = __syscall(SYS_utimes, path, tv);
|
||||
#endif
|
||||
return __syscall_ret(r);
|
||||
}
|
||||
15
be/src/glibc-compatibility/musl/vasprintf.c
Normal file
15
be/src/glibc-compatibility/musl/vasprintf.c
Normal file
@ -0,0 +1,15 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int vasprintf(char **s, const char *fmt, va_list ap)
|
||||
{
|
||||
va_list ap2;
|
||||
va_copy(ap2, ap);
|
||||
int l = vsnprintf(0, 0, fmt, ap2);
|
||||
va_end(ap2);
|
||||
|
||||
if (l<0 || !(*s=malloc(l+1U))) return -1;
|
||||
return vsnprintf(*s, l+1U, fmt, ap);
|
||||
}
|
||||
105
be/src/glibc-compatibility/musl/vdso.c
Normal file
105
be/src/glibc-compatibility/musl/vdso.c
Normal file
@ -0,0 +1,105 @@
|
||||
#include <elf.h>
|
||||
#include <link.h>
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/auxv.h>
|
||||
#include "syscall.h"
|
||||
|
||||
#ifdef VDSO_USEFUL
|
||||
|
||||
#if ULONG_MAX == 0xffffffff
|
||||
typedef Elf32_Ehdr Ehdr;
|
||||
typedef Elf32_Phdr Phdr;
|
||||
typedef Elf32_Sym Sym;
|
||||
typedef Elf32_Verdef Verdef;
|
||||
typedef Elf32_Verdaux Verdaux;
|
||||
#else
|
||||
typedef Elf64_Ehdr Ehdr;
|
||||
typedef Elf64_Phdr Phdr;
|
||||
typedef Elf64_Sym Sym;
|
||||
typedef Elf64_Verdef Verdef;
|
||||
typedef Elf64_Verdaux Verdaux;
|
||||
#endif
|
||||
|
||||
static int checkver(Verdef *def, int vsym, const char *vername, char *strings)
|
||||
{
|
||||
vsym &= 0x7fff;
|
||||
for (;;) {
|
||||
if (!(def->vd_flags & VER_FLG_BASE)
|
||||
&& (def->vd_ndx & 0x7fff) == vsym)
|
||||
break;
|
||||
if (def->vd_next == 0)
|
||||
return 0;
|
||||
def = (Verdef *)((char *)def + def->vd_next);
|
||||
}
|
||||
Verdaux *aux = (Verdaux *)((char *)def + def->vd_aux);
|
||||
return !strcmp(vername, strings + aux->vda_name);
|
||||
}
|
||||
|
||||
#define OK_TYPES (1<<STT_NOTYPE | 1<<STT_OBJECT | 1<<STT_FUNC | 1<<STT_COMMON)
|
||||
#define OK_BINDS (1<<STB_GLOBAL | 1<<STB_WEAK | 1<<STB_GNU_UNIQUE)
|
||||
|
||||
extern char** environ;
|
||||
static Ehdr *eh = NULL;
|
||||
void *__vdsosym(const char *vername, const char *name);
|
||||
// We don't have libc struct available here. Compute aux vector manually.
|
||||
__attribute__((constructor)) static void auxv_init()
|
||||
{
|
||||
size_t i, *auxv;
|
||||
for (i=0; environ[i]; i++);
|
||||
auxv = (void *)(environ+i+1);
|
||||
for (i=0; auxv[i] != AT_SYSINFO_EHDR; i+=2)
|
||||
if (!auxv[i]) return;
|
||||
if (!auxv[i+1]) return;
|
||||
eh = (void *)auxv[i+1];
|
||||
}
|
||||
|
||||
void *__vdsosym(const char *vername, const char *name)
|
||||
{
|
||||
size_t i;
|
||||
if (!eh) return 0;
|
||||
Phdr *ph = (void *)((char *)eh + eh->e_phoff);
|
||||
size_t *dynv=0, base=-1;
|
||||
for (i=0; i<eh->e_phnum; i++, ph=(void *)((char *)ph+eh->e_phentsize)) {
|
||||
if (ph->p_type == PT_LOAD)
|
||||
base = (size_t)eh + ph->p_offset - ph->p_vaddr;
|
||||
else if (ph->p_type == PT_DYNAMIC)
|
||||
dynv = (void *)((char *)eh + ph->p_offset);
|
||||
}
|
||||
if (!dynv || base==(size_t)-1) return 0;
|
||||
|
||||
char *strings = 0;
|
||||
Sym *syms = 0;
|
||||
Elf_Symndx *hashtab = 0;
|
||||
uint16_t *versym = 0;
|
||||
Verdef *verdef = 0;
|
||||
|
||||
for (i=0; dynv[i]; i+=2) {
|
||||
void *p = (void *)(base + dynv[i+1]);
|
||||
switch(dynv[i]) {
|
||||
case DT_STRTAB: strings = p; break;
|
||||
case DT_SYMTAB: syms = p; break;
|
||||
case DT_HASH: hashtab = p; break;
|
||||
case DT_VERSYM: versym = p; break;
|
||||
case DT_VERDEF: verdef = p; break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!strings || !syms || !hashtab) return 0;
|
||||
if (!verdef) versym = 0;
|
||||
|
||||
for (i=0; i<hashtab[1]; i++) {
|
||||
if (!(1<<(syms[i].st_info&0xf) & OK_TYPES)) continue;
|
||||
if (!(1<<(syms[i].st_info>>4) & OK_BINDS)) continue;
|
||||
if (!syms[i].st_shndx) continue;
|
||||
if (strcmp(name, strings+syms[i].st_name)) continue;
|
||||
if (versym && !checkver(verdef, versym[i], vername, strings))
|
||||
continue;
|
||||
return (void *)(base + syms[i].st_value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
125
be/src/glibc-compatibility/musl/x86_64/atomic_arch.h
Normal file
125
be/src/glibc-compatibility/musl/x86_64/atomic_arch.h
Normal file
@ -0,0 +1,125 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#define a_cas a_cas
|
||||
static inline int a_cas(volatile int *p, int t, int s)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"lock ; cmpxchg %3, %1"
|
||||
: "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
|
||||
return t;
|
||||
}
|
||||
|
||||
#define a_cas_p a_cas_p
|
||||
static inline void *a_cas_p(volatile void *p, void *t, void *s)
|
||||
{
|
||||
__asm__( "lock ; cmpxchg %3, %1"
|
||||
: "=a"(t), "=m"(*(void *volatile *)p)
|
||||
: "a"(t), "r"(s) : "memory" );
|
||||
return t;
|
||||
}
|
||||
|
||||
#define a_swap a_swap
|
||||
static inline int a_swap(volatile int *p, int v)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"xchg %0, %1"
|
||||
: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
|
||||
return v;
|
||||
}
|
||||
|
||||
#define a_fetch_add a_fetch_add
|
||||
static inline int a_fetch_add(volatile int *p, int v)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"lock ; xadd %0, %1"
|
||||
: "=r"(v), "=m"(*p) : "0"(v) : "memory" );
|
||||
return v;
|
||||
}
|
||||
|
||||
#define a_and a_and
|
||||
static inline void a_and(volatile int *p, int v)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"lock ; and %1, %0"
|
||||
: "=m"(*p) : "r"(v) : "memory" );
|
||||
}
|
||||
|
||||
#define a_or a_or
|
||||
static inline void a_or(volatile int *p, int v)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"lock ; or %1, %0"
|
||||
: "=m"(*p) : "r"(v) : "memory" );
|
||||
}
|
||||
|
||||
#define a_and_64 a_and_64
|
||||
static inline void a_and_64(volatile uint64_t *p, uint64_t v)
|
||||
{
|
||||
__asm__ __volatile(
|
||||
"lock ; and %1, %0"
|
||||
: "=m"(*p) : "r"(v) : "memory" );
|
||||
}
|
||||
|
||||
#define a_or_64 a_or_64
|
||||
static inline void a_or_64(volatile uint64_t *p, uint64_t v)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"lock ; or %1, %0"
|
||||
: "=m"(*p) : "r"(v) : "memory" );
|
||||
}
|
||||
|
||||
#define a_inc a_inc
|
||||
static inline void a_inc(volatile int *p)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"lock ; incl %0"
|
||||
: "=m"(*p) : "m"(*p) : "memory" );
|
||||
}
|
||||
|
||||
#define a_dec a_dec
|
||||
static inline void a_dec(volatile int *p)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"lock ; decl %0"
|
||||
: "=m"(*p) : "m"(*p) : "memory" );
|
||||
}
|
||||
|
||||
#define a_store a_store
|
||||
static inline void a_store(volatile int *p, int x)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"mov %1, %0 ; lock ; orl $0,(%%rsp)"
|
||||
: "=m"(*p) : "r"(x) : "memory" );
|
||||
}
|
||||
|
||||
#define a_barrier a_barrier
|
||||
static inline void a_barrier()
|
||||
{
|
||||
__asm__ __volatile__( "" : : : "memory" );
|
||||
}
|
||||
|
||||
#define a_spin a_spin
|
||||
static inline void a_spin()
|
||||
{
|
||||
__asm__ __volatile__( "pause" : : : "memory" );
|
||||
}
|
||||
|
||||
#define a_crash a_crash
|
||||
static inline void a_crash()
|
||||
{
|
||||
__asm__ __volatile__( "hlt" : : : "memory" );
|
||||
}
|
||||
|
||||
#define a_ctz_64 a_ctz_64
|
||||
static inline int a_ctz_64(uint64_t x)
|
||||
{
|
||||
__asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
|
||||
return x;
|
||||
}
|
||||
|
||||
#define a_clz_64 a_clz_64
|
||||
static inline int a_clz_64(uint64_t x)
|
||||
{
|
||||
__asm__( "bsr %1,%0 ; xor $63,%0" : "=r"(x) : "r"(x) );
|
||||
return x;
|
||||
}
|
||||
22
be/src/glibc-compatibility/musl/x86_64/longjmp.s
Normal file
22
be/src/glibc-compatibility/musl/x86_64/longjmp.s
Normal file
@ -0,0 +1,22 @@
|
||||
/* Copyright 2011-2012 Nicholas J. Kain, licensed under standard MIT license */
|
||||
.global musl_glibc_longjmp
|
||||
.type musl_glibc_longjmp,@function
|
||||
musl_glibc_longjmp:
|
||||
mov 0x30(%rdi),%r8
|
||||
mov 0x8(%rdi),%r9
|
||||
mov 0x38(%rdi),%rdx
|
||||
ror $0x11,%r8
|
||||
xor %fs:0x30,%r8 /* this ends up being the stack pointer */
|
||||
ror $0x11,%r9
|
||||
xor %fs:0x30,%r9
|
||||
ror $0x11,%rdx
|
||||
xor %fs:0x30,%rdx /* this is the instruction pointer */
|
||||
mov (%rdi),%rbx /* rdi is the jmp_buf, restore regs from it */
|
||||
mov 0x10(%rdi),%r12
|
||||
mov 0x18(%rdi),%r13
|
||||
mov 0x20(%rdi),%r14
|
||||
mov 0x28(%rdi),%r15
|
||||
mov %esi,%eax
|
||||
mov %r8,%rsp
|
||||
mov %r9,%rbp
|
||||
jmpq *%rdx /* goto saved address without altering rsp */
|
||||
15
be/src/glibc-compatibility/musl/x86_64/syscall.s
Normal file
15
be/src/glibc-compatibility/musl/x86_64/syscall.s
Normal file
@ -0,0 +1,15 @@
|
||||
.global __syscall
|
||||
.hidden __syscall
|
||||
.type __syscall,@function
|
||||
__syscall:
|
||||
.cfi_startproc
|
||||
movq %rdi,%rax
|
||||
movq %rsi,%rdi
|
||||
movq %rdx,%rsi
|
||||
movq %rcx,%rdx
|
||||
movq %r8,%r10
|
||||
movq %r9,%r8
|
||||
movq 8(%rsp),%r9
|
||||
syscall
|
||||
ret
|
||||
.cfi_endproc
|
||||
5
be/src/glibc-compatibility/musl/x86_64/syscall_arch.h
Normal file
5
be/src/glibc-compatibility/musl/x86_64/syscall_arch.h
Normal file
@ -0,0 +1,5 @@
|
||||
#define VDSO_USEFUL
|
||||
#define VDSO_CGT_SYM "__vdso_clock_gettime"
|
||||
#define VDSO_CGT_VER "LINUX_2.6"
|
||||
#define VDSO_GETCPU_SYM "__vdso_getcpu"
|
||||
#define VDSO_GETCPU_VER "LINUX_2.6"
|
||||
4
thirdparty/build-thirdparty.sh
vendored
4
thirdparty/build-thirdparty.sh
vendored
@ -398,7 +398,7 @@ build_curl() {
|
||||
LDFLAGS="-L${TP_LIB_DIR}" LIBS="-lcrypto -lssl -lcrypto -ldl" \
|
||||
CFLAGS="-fPIC" \
|
||||
./configure --prefix=$TP_INSTALL_DIR --disable-shared --enable-static \
|
||||
--without-librtmp --with-ssl=${TP_INSTALL_DIR} --without-libidn2 --disable-ldap --enable-ipv6
|
||||
--without-librtmp --without-libssh2 --without-nghttp2 --with-ssl=${TP_INSTALL_DIR} --without-libidn2 --disable-ldap --enable-ipv6
|
||||
make -j$PARALLEL && make install
|
||||
}
|
||||
|
||||
@ -822,4 +822,4 @@ build_aws_c_event_stream
|
||||
build_aws_sdk
|
||||
build_js_and_css
|
||||
|
||||
echo "Finihsed to build all thirdparties"
|
||||
echo "Finished to build all thirdparties"
|
||||
|
||||
Reference in New Issue
Block a user