From 933a99f8419eda8db1a0e0ecfa3c271be955d765 Mon Sep 17 00:00:00 2001 From: x Date: Fri, 9 Jun 2023 14:24:46 +0200 Subject: [PATCH] TurboPFor: Bit bituil --- lib/include_/bitutil.h | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) mode change 100644 => 100755 lib/include_/bitutil.h diff --git a/lib/include_/bitutil.h b/lib/include_/bitutil.h old mode 100644 new mode 100755 index f5664bd..3d791b7 --- a/lib/include_/bitutil.h +++ b/lib/include_/bitutil.h @@ -143,8 +143,6 @@ void bitzdec(unsigned char *in, unsigned n, unsigned esize); #define FLOAT2INT(fval, scalse, bias) round(fval * scale + bias) #define INT2FLOAT(ival, scalse, bias) ((ival - bias) / scale) -//------- Lossy floating point transform: pad the trailing mantissa bits with zeros according to the error e (ex. e=0.00001) -// must include float.h to use _Float16 (see icapp.c) #if defined(__clang__) && defined(__is_identifier) #if !__is_identifier(_Float16) #undef FLT16_BUILTIN @@ -152,6 +150,38 @@ void bitzdec(unsigned char *in, unsigned n, unsigned esize); #elif defined(FLT16_MAX) #define FLT16_BUILTIN #endif +//------- Quantization : b number of bits quantized in out ---------------- + #if defined(FLT16_BUILTIN) +void fpquant8e16( _Float16 *in, size_t n, uint8_t *out, unsigned b, _Float16 *pfmin, _Float16 *pfmax); +void fpquant16e16(_Float16 *in, size_t n, uint16_t *out, unsigned b, _Float16 *pfmin, _Float16 *pfmax); + #endif + +void fpquant8e32( float *in, size_t n, uint8_t *out, unsigned b, float *pfmin, float *pfmax); +void fpquant16e32( float *in, size_t n, uint16_t *out, unsigned b, float *pfmin, float *pfmax); +void fpquant32e32( float *in, size_t n, uint32_t *out, unsigned b, float *pfmin, float *pfmax); + +void fpquant8e64( double *in, size_t n, uint8_t *out, unsigned b, double *pfmin, double *pfmax); +void fpquant16e64( double *in, size_t n, uint16_t *out, unsigned b, double *pfmin, double *pfmax); +void fpquant32e64( double *in, size_t n, uint32_t *out, unsigned b, double *pfmin, double *pfmax); +void fpquant64e64( double *in, size_t n, uint64_t *out, unsigned b, double *pfmin, double *pfmax); + + #if defined(FLT16_BUILTIN) +void fpquant8d16( uint8_t *in, size_t n, _Float16 *out, unsigned b, _Float16 fmin, _Float16 fmax); +void fpquant16d16(uint16_t *in, size_t n, _Float16 *out, unsigned b, _Float16 fmin, _Float16 fmax); + #endif + +void fpquant8d32( uint8_t *in, size_t n, float *out, unsigned b, float fmin, float fmax); +void fpquant16d32(uint16_t *in, size_t n, float *out, unsigned b, float fmin, float fmax); +void fpquant32d32(uint32_t *in, size_t n, float *out, unsigned b, float fmin, float fmax); + +void fpquant8d64( uint8_t *in, size_t n, double *out, unsigned b, double fmin, double fmax); +void fpquant16d64(uint16_t *in, size_t n, double *out, unsigned b, double fmin, double fmax); +void fpquant32d64(uint32_t *in, size_t n, double *out, unsigned b, double fmin, double fmax); +void fpquant64d64(uint64_t *in, size_t n, double *out, unsigned b, double fmin, double fmax); + + +//------- Lossy floating point transform: pad the trailing mantissa bits with zeros according to the error e (ex. e=0.00001) +// must include float.h to use _Float16 (see icapp.c) #ifdef FLT16_BUILTIN _Float16 _fprazor16(_Float16 d, float e, int lg2e); void fprazor16(_Float16 *in, unsigned n, _Float16 *out, float e);