TurboPFor: Bit bituil

This commit is contained in:
x
2023-06-09 14:24:46 +02:00
parent 41a45792f8
commit 933a99f841

34
lib/include_/bitutil.h Normal file → Executable file
View File

@ -143,8 +143,6 @@ void bitzdec(unsigned char *in, unsigned n, unsigned esize);
#define FLOAT2INT(fval, scalse, bias) round(fval * scale + bias)
#define INT2FLOAT(ival, scalse, bias) ((ival - bias) / scale)
//------- Lossy floating point transform: pad the trailing mantissa bits with zeros according to the error e (ex. e=0.00001)
// must include float.h to use _Float16 (see icapp.c)
#if defined(__clang__) && defined(__is_identifier)
#if !__is_identifier(_Float16)
#undef FLT16_BUILTIN
@ -152,6 +150,38 @@ void bitzdec(unsigned char *in, unsigned n, unsigned esize);
#elif defined(FLT16_MAX)
#define FLT16_BUILTIN
#endif
//------- Quantization : b number of bits quantized in out ----------------
#if defined(FLT16_BUILTIN)
void fpquant8e16( _Float16 *in, size_t n, uint8_t *out, unsigned b, _Float16 *pfmin, _Float16 *pfmax);
void fpquant16e16(_Float16 *in, size_t n, uint16_t *out, unsigned b, _Float16 *pfmin, _Float16 *pfmax);
#endif
void fpquant8e32( float *in, size_t n, uint8_t *out, unsigned b, float *pfmin, float *pfmax);
void fpquant16e32( float *in, size_t n, uint16_t *out, unsigned b, float *pfmin, float *pfmax);
void fpquant32e32( float *in, size_t n, uint32_t *out, unsigned b, float *pfmin, float *pfmax);
void fpquant8e64( double *in, size_t n, uint8_t *out, unsigned b, double *pfmin, double *pfmax);
void fpquant16e64( double *in, size_t n, uint16_t *out, unsigned b, double *pfmin, double *pfmax);
void fpquant32e64( double *in, size_t n, uint32_t *out, unsigned b, double *pfmin, double *pfmax);
void fpquant64e64( double *in, size_t n, uint64_t *out, unsigned b, double *pfmin, double *pfmax);
#if defined(FLT16_BUILTIN)
void fpquant8d16( uint8_t *in, size_t n, _Float16 *out, unsigned b, _Float16 fmin, _Float16 fmax);
void fpquant16d16(uint16_t *in, size_t n, _Float16 *out, unsigned b, _Float16 fmin, _Float16 fmax);
#endif
void fpquant8d32( uint8_t *in, size_t n, float *out, unsigned b, float fmin, float fmax);
void fpquant16d32(uint16_t *in, size_t n, float *out, unsigned b, float fmin, float fmax);
void fpquant32d32(uint32_t *in, size_t n, float *out, unsigned b, float fmin, float fmax);
void fpquant8d64( uint8_t *in, size_t n, double *out, unsigned b, double fmin, double fmax);
void fpquant16d64(uint16_t *in, size_t n, double *out, unsigned b, double fmin, double fmax);
void fpquant32d64(uint32_t *in, size_t n, double *out, unsigned b, double fmin, double fmax);
void fpquant64d64(uint64_t *in, size_t n, double *out, unsigned b, double fmin, double fmax);
//------- Lossy floating point transform: pad the trailing mantissa bits with zeros according to the error e (ex. e=0.00001)
// must include float.h to use _Float16 (see icapp.c)
#ifdef FLT16_BUILTIN
_Float16 _fprazor16(_Float16 d, float e, int lg2e);
void fprazor16(_Float16 *in, unsigned n, _Float16 *out, float e);