TurboPFor: Floating point

This commit is contained in:
x
2023-03-23 11:29:37 +01:00
parent d160509253
commit 0c047fae98

View File

@ -126,14 +126,14 @@ static unsigned char dcode[] = {0, 6,10, 14, 18, 20, 22, 24};
//-------- TurboPFor Zigzag of zigzag for unsorted/sorted integer/floating point array --------------------------------------- //-------- TurboPFor Zigzag of zigzag for unsorted/sorted integer/floating point array ---------------------------------------
size_t T2(p4nzzenc128v,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(p4nzzenc128v,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t _p[VSIZE+32], *ip, *p, pd = 0; uint_t _p[VSIZE+32], *ip, *p, pd = 0;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
#define FE(_i_,_usize_) { T3(uint, USIZE, _t) u = ip[_i_]; start = u-start; p[_i_] = ZZAGENC(start,pd,_usize_); pd = start; start = u; } #define FE(_i_,_usize_) { T3(uint, USIZE, _t) u = ip[_i_]; start = u-start; p[_i_] = ZZAGENC(start,pd,_usize_); pd = start; start = u; }
for(ip = in; ip != in + (n&~(VSIZE-1)); ) { for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); }
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
} }
if(n = (in+n)-ip) { if((n = (in+n)-ip) != 0) {
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e; op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e;
} }
@ -153,7 +153,7 @@ size_t T2(p4nzzdec128v,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t s
for(op = out; op != out+(n&~(VSIZE-1)); ) { PREFETCH(ip+512,0); for(op = out; op != out+(n&~(VSIZE-1)); ) { PREFETCH(ip+512,0);
for(ip = T2(P4DECV,USIZE)(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } for(ip = T2(P4DECV,USIZE)(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
} }
if(n = (out+n) - op) if((n = (out+n) - op) != 0)
for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD(0,USIZE); for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
return ip - in; return ip - in;
#undef FD #undef FD
@ -169,7 +169,7 @@ size_t T2(p4nzzdec128v,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t s
*/ */
size_t T2(fpxenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(fpxenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t _p[VSIZE+32], *ip, *p; uint_t _p[VSIZE+32], *ip, *p;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
#if defined(__AVX2__) && USIZE >= 32 #if defined(__AVX2__) && USIZE >= 32
#define _mm256_set1_epi64(a) _mm256_set1_epi64x(a) #define _mm256_set1_epi64(a) _mm256_set1_epi64x(a)
@ -233,7 +233,7 @@ size_t T2(fpxenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start)
#endif #endif
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
} }
if(n = (in+n)-ip) { uint_t b = 0; if((n = (in+n)-ip) != 0) { uint_t b = 0;
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
b = T2(clz,USIZE)(b); b = T2(clz,USIZE)(b);
*op++ = b; *op++ = b;
@ -295,7 +295,7 @@ size_t T2(fpxdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
#endif #endif
} }
if(n = (out+n) - op) { if((n = (out+n) - op) != 0) {
uint_t b = *ip++; uint_t b = *ip++;
for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p < &_p[n]; p++,op++) FD(0,USIZE); for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p < &_p[n]; p++,op++) FD(0,USIZE);
} }
@ -312,7 +312,7 @@ size_t T2(fpxdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t htab[1<<HBITS] = {0}, _p[VSIZE+32], *ip, h = 0, *p; uint_t htab[1<<HBITS] = {0}, _p[VSIZE+32], *ip, h = 0, *p;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
#if defined(__AVX2__) && USIZE >= 32 #if defined(__AVX2__) && USIZE >= 32
#define _mm256_set1_epi64(a) _mm256_set1_epi64x(a) #define _mm256_set1_epi64(a) _mm256_set1_epi64x(a)
@ -354,7 +354,7 @@ size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start
#endif #endif
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
} }
if(n = (in+n)-ip) { uint_t b = 0; if((n = (in+n)-ip) != 0) { uint_t b = 0;
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
b = T2(clz,USIZE)(b); b = T2(clz,USIZE)(b);
*op++ = b; *op++ = b;
@ -380,7 +380,7 @@ size_t T2(fpfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start
unsigned b = *ip++; ip = T2(P4DECV,USIZE)(ip, VSIZE, _p); unsigned b = *ip++; ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
} }
if(n = ((uint_t *)out+n) - op) { if((n = ((uint_t *)out+n) - op) != 0) {
unsigned b = *ip++; ip = T2(P4DEC,USIZE)(ip, n, _p); unsigned b = *ip++; ip = T2(P4DEC,USIZE)(ip, n, _p);
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE); for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
} }
@ -391,7 +391,7 @@ size_t T2(fpfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start
//-------- TurboFloat DFCM: Differential Finite Context Method Predictor ---------------------------------------------------------- //-------- TurboFloat DFCM: Differential Finite Context Method Predictor ----------------------------------------------------------
size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0}; uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0};
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
#define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \ #define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \
htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = u;\ htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = u;\
@ -403,7 +403,7 @@ size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t star
for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); }
*op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; *op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
} }
if(n = (in+n)-ip) { uint_t b; if((n = (in+n)-ip) != 0) { uint_t b;
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
b = T2(clz,USIZE)(b); b = T2(clz,USIZE)(b);
for(p = _p; p != &_p[n]; p++) TR(0,USIZE); for(p = _p; p != &_p[n]; p++) TR(0,USIZE);
@ -429,7 +429,7 @@ size_t T2(fpdfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t star
ip = T2(P4DECV,USIZE)(ip, VSIZE, _p); ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
} }
if(n = ((uint_t *)out+n) - op) { if((n = ((uint_t *)out+n) - op) != 0) {
uint_t b = *ip++; uint_t b = *ip++;
ip = T2(P4DEC,USIZE)(ip, n, _p); ip = T2(P4DEC,USIZE)(ip, n, _p);
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE); for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
@ -441,7 +441,7 @@ size_t T2(fpdfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t star
//-------- TurboFloat Double delta DFCM: Differential Finite Context Method Predictor ---------------------------------------------------------- //-------- TurboFloat Double delta DFCM: Differential Finite Context Method Predictor ----------------------------------------------------------
size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0},start0=start; start=0; uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0},start0=start; start=0;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
#define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \ #define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \
htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = start0; start0 = u;\ htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = start0; start0 = u;\
@ -456,7 +456,7 @@ size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t sta
for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); }
*op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e; *op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
} }
if(n = (in+n)-ip) { if((n = (in+n)-ip) != 0) {
uint_t b = 0; uint_t b = 0;
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE); for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
b = T2(clz,USIZE)(b); b = T2(clz,USIZE)(b);
@ -484,7 +484,7 @@ size_t T2(fp2dfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t sta
ip = T2(P4DECV,USIZE)(ip, VSIZE, _p); ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
} }
if(n = ((uint_t *)out+n) - op) { if((n = ((uint_t *)out+n) - op) != 0) {
uint_t b = *ip++; uint_t b = *ip++;
ip = T2(P4DEC,USIZE)(ip, n, _p); ip = T2(P4DEC,USIZE)(ip, n, _p);
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE); for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
@ -510,7 +510,7 @@ size_t T2(fp2dfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t sta
size_t T2(fpgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(fpgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t *ip = in; uint_t *ip = in;
unsigned ol = 0, ot = 0; unsigned ol = 0, ot = 0;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
bitdef(bw,br); bitdef(bw,br);
if(start) { ol = T2(clz,USIZE)(start); ot = T2(ctz,USIZE)(start); } if(start) { ol = T2(clz,USIZE)(start); ot = T2(ctz,USIZE)(start); }
*op++ = 0; *op++ = 0;
@ -561,7 +561,7 @@ size_t T2(fpgdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
size_t T2(fphenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { size_t T2(fphenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
uint_t *ip = in; uint_t *ip = in;
int ol = USIZE+1,s; int ol = USIZE+1,s;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
*op++ = 0; *op++ = 0;
bitdef(bw,br); bitdef(bw,br);
if(start) ol = T2(clz,USIZE)(start); if(start) ol = T2(clz,USIZE)(start);
@ -636,7 +636,7 @@ size_t T2(fphdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
size_t T2(fpc0enc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // simple hash table size_t T2(fpc0enc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // simple hash table
uint_t *ip = in; uint_t *ip = in;
unsigned char *op = out, out_ = out+n*USIZE/8; unsigned char *op = out, *out_ = out+n*USIZE/8;
unsigned htab[1<<FX_BITS] = {0}, ol = USIZE+1; unsigned htab[1<<FX_BITS] = {0}, ol = USIZE+1;
bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start); bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start);
@ -673,7 +673,7 @@ size_t T2(fpc0enc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start)
size_t T2(fpcenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // double entry hashtable size_t T2(fpcenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // double entry hashtable
uint_t *ip = in; uint_t *ip = in;
unsigned char *op = out, out_ = out+n*(USIZE/8); unsigned char *op = out, *out_ = out+n*(USIZE/8);
unsigned htab[1<<(FX_BITS+1)] = {0}, ol = USIZE+1; unsigned htab[1<<(FX_BITS+1)] = {0}, ol = USIZE+1;
bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start); bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start);