TurboPFor: Config/Platform

This commit is contained in:
powturbo
2021-11-11 16:13:51 +01:00
parent 3ab6ca6163
commit 7a6a474030

43
conf.h
View File

@ -39,11 +39,12 @@
#define popcnt64(_x_) __builtin_popcountll(_x_)
#if defined(__i386__) || defined(__x86_64__)
//__bsr32: 1:0,2:1,3:1,4:2,5:2,6:2,7:2,8:3,9:3,10:3,11:3,12:3,13:3,14:3,15:3,16:4,17:4,18:4,19:4,20:4,21:4,22:4,23:4,24:4,25:4,26:4,27:4,28:4,29:4,30:4,31:4,32:5
// bsr32: 0:0,1:1,2:2,3:2,4:3,5:3,6:3,7:3,8:4,9:4,10:4,11:4,12:4,13:4,14:4,15:4,16:5,17:5,18:5,19:5,20:5,21:5,22:5,23:5,24:5,25:5,26:5,27:5,28:5,29:5,30:5,31:5,32:6,
//x,__bsr32: 1:0,2:1,3:1,4:2,5:2,6:2,7:2,8:3,9:3,10:3,11:3,12:3,13:3,14:3,15:3,16:4,17:4,18:4,19:4,20:4,21:4,22:4,23:4,24:4,25:4,26:4,27:4,28:4,29:4,30:4,31:4,32:5
// x,bsr32: 0:0,1:1,2:2,3:2,4:3,5:3,6:3,7:3,8:4,9:4,10:4,11:4,12:4,13:4,14:4,15:4,16:5,17:5,18:5,19:5,20:5,21:5,22:5,23:5,24:5,25:5,26:5,27:5,28:5,29:5,30:5,31:5,32:6,
static inline int __bsr32( int x) { asm("bsr %1,%0" : "=r" (x) : "rm" (x) ); return x; }
static inline int bsr32( int x) { int b = -1; asm("bsrl %1,%0" : "+r" (b) : "rm" (x) ); return b + 1; }
static inline int bsr64(uint64_t x) { return x?64 - __builtin_clzll(x):0; }
static inline int bsr64(uint64_t x ) { return x?64 - __builtin_clzll(x):0; }
static inline int __bsr64(uint64_t x ) { return 63 - __builtin_clzll(x); }
static inline unsigned rol32(unsigned x, int s) { asm ("roll %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
static inline unsigned ror32(unsigned x, int s) { asm ("rorl %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
@ -63,7 +64,7 @@ static inline unsigned ror64(unsigned x, int s) { return x >> s | x << (64 - s);
#define ctz64(_x_) __builtin_ctzll(_x_)
#define ctz32(_x_) __builtin_ctz(_x_) // 0:32 ctz32(1<<a) = a (a=1..31)
#define clz64(_x_) __builtin_clzll(_x_)
#define clz32(_x_) __builtin_clz(_x_)
#define clz32(_x_) __builtin_clz(_x_) // 00000000 00000000 00000000 01000000 = 25
//#define bswap8(x) (x)
#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
@ -131,8 +132,11 @@ static inline int clz64(uint64_t x) { unsigned long z; _BitScanReverse64(&z, x
#define strcasecmp _stricmp
#define strncasecmp _strnicmp
#define strtoull _strtoui64
static inline double round(double num) { return (num > 0.0) ? floor(num + 0.5) : ceil(num - 0.5); }
#endif
#define __bsr8(_x_) __bsr32(_x_)
#define __bsr16(_x_) __bsr32(_x_)
#define bsr8(_x_) bsr32(_x_)
#define bsr16(_x_) bsr32(_x_)
#define ctz8(_x_) ctz32(_x_)
@ -214,10 +218,30 @@ struct _PACKED doubleu { double d; };
#endif
//---------------------misc ---------------------------------------------------
#define BZHI64(_u_, _b_) ((_u_) & ((1ull<<(_b_))-1))
#define BZHI32(_u_, _b_) ((_u_) & ((1u <<(_b_))-1))
#define BZHI16(_u_, _b_) BZHI32(_u_, _b_)
#define BZHI8(_u_, _b_) BZHI32(_u_, _b_)
#define BZHI64F(_u_, _b_) ((_u_) & ((1ull<<(_b_))-1)) // _b_ < 64
#define BZHI32F(_u_, _b_) ((_u_) & ((1u <<(_b_))-1)) // _b_ < 32
#define BZHI64( _u_, _b_) (_b_ == 64?0xffffffffffffffffull:((_u_) & ((1ull<<(_b_))-1))) // Constant
#define BZHI32( _u_, _b_) (_b_ == 32? 0xffffffffu :((_u_) & ((1u <<(_b_))-1)))
#define BZHI16( _u_, _b_) BZHI32(_u_, _b_)
#define BZHI8( _u_, _b_) BZHI32(_u_, _b_)
#ifdef __AVX2__
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#define bzhi32(_u_, _b_) _bzhi_u32(_u_, _b_)
#if !(defined(_M_X64) || defined(__amd64__)) && (defined(__i386__) || defined(_M_IX86))
#define bzhi64(_u_, _b_) ((_u_) & ((1ull<<(_b_))-1))
#else
#define bzhi64(_u_, _b_) _bzhi_u64(_u_, _b_)
#endif
#else
#define bzhi_u64(_u_, _b_) BZHI64(_u_, _b_)
#define bzhi_u32(_u_, _b_) BZHI32(_u_, _b_)
#endif
#define SIZE_ROUNDUP(_n_, _a_) (((size_t)(_n_) + (size_t)((_a_) - 1)) & ~(size_t)((_a_) - 1))
#define ALIGN_DOWN(__ptr, __a) ((void *)((uintptr_t)(__ptr) & ~(uintptr_t)((__a) - 1)))
@ -230,6 +254,9 @@ struct _PACKED doubleu { double d; };
#define CACHE_LINE_SIZE 64
#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*4)
#define CLAMP(_x_, _low_, _high_) (((_x_) > (_high_)) ? (_high_) : (((_x_) < (_low_)) ? (_low_) : (_x_)))
//--- NDEBUG -------
#include <stdio.h>
#ifdef _MSC_VER