From b8377faa4a0448fcaf71767a3e53669282531d18 Mon Sep 17 00:00:00 2001 From: x Date: Sun, 30 Apr 2023 22:44:18 +0200 Subject: [PATCH] TurboPFor: Floating point --- lib/fp.c | 81 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/lib/fp.c b/lib/fp.c index a96da65..e742527 100644 --- a/lib/fp.c +++ b/lib/fp.c @@ -311,8 +311,9 @@ size_t T2(fpxdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) #define HASH8( _h_,_u_) (((_h_)<<2 ^ (_u_)>> 5) & ((1u<= 32 #define _mm256_set1_epi64(a) _mm256_set1_epi64x(a) @@ -322,7 +323,8 @@ size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start __m128i sv = T2(_mm_set1_epi, USIZE)(start); #endif - for(ip = in; ip != in + (n&~(VSIZE-1)); ) { uint_t b = 0; + for(ip = in; ip != in + (n&~(VSIZE-1)); ) { + uint_t b = 0; #define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, htab[h],_usize_); b |= p[_i_]; htab[h] = u; h = T2(HASH,_usize_)(h,u); } for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); } *op++ = b = T2(clz,USIZE)(b); @@ -354,24 +356,24 @@ size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start #endif op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; } - if((n = (in+n)-ip) != 0) { uint_t b = 0; - for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); - b = T2(clz,USIZE)(b); + if((m = (in+n)-ip) != 0) { + uint_t b = 0; + for(p = _p; p != &_p[m]; p++,ip++) FE(0,USIZE); + b = b?T2(clz,USIZE)(b):USIZE; *op++ = b; - for(p = _p; p != &_p[n]; p++) TR(0,USIZE); - op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e; - } - if(op >= out_) { - e:op = out; *op++ = 0xff; memcpy(op, in, n*(USIZE/8)); op+=n*(USIZE/8); + for(p = _p; p != &_p[m]; p++) TR(0,USIZE); + op = T2(P4ENC,USIZE)(_p, m, op); if(op >= out_) goto e; } + if(op >= out_) { e:op = out; *op++ = 0xff; memcpy(op, in, n*(USIZE/8)); op += n*(USIZE/8); } return op - out; #undef FE } size_t T2(fpfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { - uint_t *op, htab[1<>b;\ u = XORDEC(u, htab[h], _usize_); op[_i_] = u; htab[h] = u; h = T2(HASH,_usize_)(h,u);\ @@ -390,36 +392,39 @@ size_t T2(fpfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start //-------- TurboFloat DFCM: Differential Finite Context Method Predictor ---------------------------------------------------------- size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { - uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<= out_) goto e; + *op++ = b; + op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; } - if((n = (in+n)-ip) != 0) { uint_t b; - for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); - b = T2(clz,USIZE)(b); - for(p = _p; p != &_p[n]; p++) TR(0,USIZE); - *op++ = b; op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e; - } - if(op >= out_) { - e:op = out; *op++ = 0xff; memcpy(op, in, n*(USIZE/8)); op+=n*(USIZE/8); + if((m = (in+n)-ip) != 0) { + uint_t b = 0; + for(p = _p; p != &_p[m]; p++,ip++) FE(0,USIZE); + b = b?T2(clz,USIZE)(b):USIZE; + for(p = _p; p != &_p[m]; p++) TR(0,USIZE); + *op++ = b; op = T2(P4ENC,USIZE)(_p, m, op); if(op >= out_) goto e; } + if(op >= out_) { e:op = out; *op++ = 0xff; memcpy(op, in, n*(USIZE/8)); op += n*(USIZE/8); } return op - out; #undef FE } size_t T2(fpdfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { - uint_t _p[VSIZE+32], *op, h = 0, *p, htab[1<>b; u = XORDEC(u, (htab[h]+start),_usize_); \ op[_i_] = u; htab[h] = start = u-start; h = T2(HASH,_usize_)(h,start); start = u;\ @@ -440,7 +445,8 @@ size_t T2(fpdfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t star //-------- TurboFloat Double delta DFCM: Differential Finite Context Method Predictor ---------------------------------------------------------- size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { - uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<= out_) goto e; } - if((n = (in+n)-ip) != 0) { + if((m = (in+n)-ip) != 0) { uint_t b = 0; - for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); + for(p = _p; p != &_p[m]; p++,ip++) FE(0,USIZE); b = T2(clz,USIZE)(b); - for(p = _p; p != &_p[n]; p++) TR(0,USIZE); - *op++ = b; op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e; - } - if(op >= out_) { - e:op = out; *op++ = 0xff; memcpy(op, in, n*(USIZE/8)); op+=n*(USIZE/8); + for(p = _p; p != &_p[m]; p++) TR(0,USIZE); + *op++ = b; op = T2(P4ENC,USIZE)(_p, m, op); if(op >= out_) goto e; } + if(op >= out_) { e:op = out; *op++ = 0xff; memcpy(op, in, n*(USIZE/8)); op+=n*(USIZE/8); } return op - out; #undef FE } size_t T2(fp2dfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { - uint_t _p[VSIZE+32], *op, h = 0, *p, htab[1<>b; u = XORDEC(u, (htab[h]+start),_usize_);\