diff --git a/lib/bitutil.c b/lib/bitutil.c old mode 100644 new mode 100755 index dac82a1..0721544 --- a/lib/bitutil.c +++ b/lib/bitutil.c @@ -877,6 +877,54 @@ void bitzdec(unsigned char *in, unsigned n, unsigned esize) { } } +//----------- Quantization ----------------------------------- +#define ROUND16(x) roundf(x) +#define ROUND32(x) roundf(x) +#define ROUND64(x) round(x) + +#define _FPQUANTE(t_s, _x_, _fmin_, _delta_) T2(ROUND,t_s)(((_x_) - _fmin_)*_delta_) + +#define FPQUANTE(t_t, in, n, out, b, t_s, pfmin, pfmax) { t_t fmax = in[0], fmin = in[0], *ip;\ + for(ip = in; ip < in+n; ip++) { if(*ip > fmax) fmax = *ip; else if(*ip < fmin) fmin = *ip; } *pfmin = fmin; *pfmax = fmax;/*min,max*/\ + fmax = (fmax == fmin)?(t_t)0.0:BZMASK32(b)/(fmax - fmin);\ + for(ip = in; ip < in+n; ip++) *out++ = _FPQUANTE(t_s, ip[0],fmin,fmax);\ +} + +#define FPQUANTD(t_t, in, n, out, b, fmin, fmax) { t_t *op;\ + fmax = (fmax - fmin) / BZMASK32(b);\ + for(op = out; op < out+n; op++) *op = fmin + (*in++) * fmax; \ + t_t fmax = out[0], fmin = out[0]; for(op = out; op < out+n; op++) { if(*op > fmax) fmax = *op; else if(*op < fmin) fmin = *op; } \ + printf("RANGE=[%g-%g]=%g ", (double)fmin, (double)fmax, (double)fmax - (double)fmin);\ +} + + #if defined(FLT16_BUILTIN) +void fpquant8e16( _Float16 *in, size_t n, uint8_t *out, unsigned b, _Float16 *pfmin, _Float16 *pfmax) { FPQUANTE(_Float16, in, n, out, b, 16, pfmin, pfmax); } +void fpquant16e16(_Float16 *in, size_t n, uint16_t *out, unsigned b, _Float16 *pfmin, _Float16 *pfmax) { FPQUANTE(_Float16, in, n, out, b, 16, pfmin, pfmax); } + #endif + +void fpquant8e32( float *in, size_t n, uint8_t *out, unsigned b, float *pfmin, float *pfmax) { FPQUANTE( float, in, n, out, b, 32, pfmin, pfmax); } +void fpquant16e32( float *in, size_t n, uint16_t *out, unsigned b, float *pfmin, float *pfmax) { FPQUANTE( float, in, n, out, b, 32, pfmin, pfmax); } +void fpquant32e32( float *in, size_t n, uint32_t *out, unsigned b, float *pfmin, float *pfmax) { FPQUANTE( float, in, n, out, b, 32, pfmin, pfmax); } + +void fpquant8e64( double *in, size_t n, uint8_t *out, unsigned b, double *pfmin, double *pfmax) { FPQUANTE( double, in, n, out, b, 64, pfmin, pfmax); } +void fpquant16e64( double *in, size_t n, uint16_t *out, unsigned b, double *pfmin, double *pfmax) { FPQUANTE( double, in, n, out, b, 64, pfmin, pfmax); } +void fpquant32e64( double *in, size_t n, uint32_t *out, unsigned b, double *pfmin, double *pfmax) { FPQUANTE( double, in, n, out, b, 64, pfmin, pfmax); } +void fpquant64e64( double *in, size_t n, uint64_t *out, unsigned b, double *pfmin, double *pfmax) { FPQUANTE( double, in, n, out, b, 64, pfmin, pfmax); } + + #if defined(FLT16_BUILTIN) +void fpquant8d16( uint8_t *in, size_t n, _Float16 *out, unsigned b, _Float16 fmin, _Float16 fmax) { FPQUANTD(_Float16, in, n, out, b, fmin, fmax); } +void fpquant16d16(uint16_t *in, size_t n, _Float16 *out, unsigned b, _Float16 fmin, _Float16 fmax) { FPQUANTD(_Float16, in, n, out, b, fmin, fmax); } + #endif + +void fpquant8d32( uint8_t *in, size_t n, float *out, unsigned b, float fmin, float fmax) { FPQUANTD( float, in, n, out, b, fmin, fmax); } +void fpquant16d32(uint16_t *in, size_t n, float *out, unsigned b, float fmin, float fmax) { FPQUANTD( float, in, n, out, b, fmin, fmax); } +void fpquant32d32(uint32_t *in, size_t n, float *out, unsigned b, float fmin, float fmax) { FPQUANTD( float, in, n, out, b, fmin, fmax); } + +void fpquant8d64( uint8_t *in, size_t n, double *out, unsigned b, double fmin, double fmax) { FPQUANTD( double, in, n, out, b, fmin, fmax); } +void fpquant16d64(uint16_t *in, size_t n, double *out, unsigned b, double fmin, double fmax) { FPQUANTD( double, in, n, out, b, fmin, fmax); } +void fpquant32d64(uint32_t *in, size_t n, double *out, unsigned b, double fmin, double fmax) { FPQUANTD( double, in, n, out, b, fmin, fmax); } +void fpquant64d64(uint64_t *in, size_t n, double *out, unsigned b, double fmin, double fmax) { FPQUANTD( double, in, n, out, b, fmin, fmax); } + //----------- Lossy floating point conversion: pad the trailing mantissa bits with zero bits according to the relative error e (ex. 0.00001) ---------- #if defined(FLT16_BUILTIN) // https://clang.llvm.org/docs/LanguageExtensions.html#half-precision-floating-point