TurboPFor: Floating point
This commit is contained in:
38
lib/fp.c
38
lib/fp.c
@ -126,14 +126,14 @@ static unsigned char dcode[] = {0, 6,10, 14, 18, 20, 22, 24};
|
|||||||
//-------- TurboPFor Zigzag of zigzag for unsorted/sorted integer/floating point array ---------------------------------------
|
//-------- TurboPFor Zigzag of zigzag for unsorted/sorted integer/floating point array ---------------------------------------
|
||||||
size_t T2(p4nzzenc128v,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(p4nzzenc128v,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t _p[VSIZE+32], *ip, *p, pd = 0;
|
uint_t _p[VSIZE+32], *ip, *p, pd = 0;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
|
|
||||||
#define FE(_i_,_usize_) { T3(uint, USIZE, _t) u = ip[_i_]; start = u-start; p[_i_] = ZZAGENC(start,pd,_usize_); pd = start; start = u; }
|
#define FE(_i_,_usize_) { T3(uint, USIZE, _t) u = ip[_i_]; start = u-start; p[_i_] = ZZAGENC(start,pd,_usize_); pd = start; start = u; }
|
||||||
for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
|
for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
|
||||||
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); }
|
||||||
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
||||||
}
|
}
|
||||||
if(n = (in+n)-ip) {
|
if((n = (in+n)-ip) != 0) {
|
||||||
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
||||||
op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e;
|
op = T2(P4ENC,USIZE)(_p, n, op); if(op >= out_) goto e;
|
||||||
}
|
}
|
||||||
@ -153,7 +153,7 @@ size_t T2(p4nzzdec128v,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t s
|
|||||||
for(op = out; op != out+(n&~(VSIZE-1)); ) { PREFETCH(ip+512,0);
|
for(op = out; op != out+(n&~(VSIZE-1)); ) { PREFETCH(ip+512,0);
|
||||||
for(ip = T2(P4DECV,USIZE)(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
for(ip = T2(P4DECV,USIZE)(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
||||||
}
|
}
|
||||||
if(n = (out+n) - op)
|
if((n = (out+n) - op) != 0)
|
||||||
for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
||||||
return ip - in;
|
return ip - in;
|
||||||
#undef FD
|
#undef FD
|
||||||
@ -169,7 +169,7 @@ size_t T2(p4nzzdec128v,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t s
|
|||||||
*/
|
*/
|
||||||
size_t T2(fpxenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(fpxenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t _p[VSIZE+32], *ip, *p;
|
uint_t _p[VSIZE+32], *ip, *p;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
|
|
||||||
#if defined(__AVX2__) && USIZE >= 32
|
#if defined(__AVX2__) && USIZE >= 32
|
||||||
#define _mm256_set1_epi64(a) _mm256_set1_epi64x(a)
|
#define _mm256_set1_epi64(a) _mm256_set1_epi64x(a)
|
||||||
@ -233,7 +233,7 @@ size_t T2(fpxenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start)
|
|||||||
#endif
|
#endif
|
||||||
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
||||||
}
|
}
|
||||||
if(n = (in+n)-ip) { uint_t b = 0;
|
if((n = (in+n)-ip) != 0) { uint_t b = 0;
|
||||||
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
||||||
b = T2(clz,USIZE)(b);
|
b = T2(clz,USIZE)(b);
|
||||||
*op++ = b;
|
*op++ = b;
|
||||||
@ -295,7 +295,7 @@ size_t T2(fpxdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
|
|||||||
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if(n = (out+n) - op) {
|
if((n = (out+n) - op) != 0) {
|
||||||
uint_t b = *ip++;
|
uint_t b = *ip++;
|
||||||
for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p < &_p[n]; p++,op++) FD(0,USIZE);
|
for(ip = T2(P4DEC,USIZE)(ip, n, _p), p = _p; p < &_p[n]; p++,op++) FD(0,USIZE);
|
||||||
}
|
}
|
||||||
@ -312,7 +312,7 @@ size_t T2(fpxdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
|
|||||||
|
|
||||||
size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t htab[1<<HBITS] = {0}, _p[VSIZE+32], *ip, h = 0, *p;
|
uint_t htab[1<<HBITS] = {0}, _p[VSIZE+32], *ip, h = 0, *p;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
|
|
||||||
#if defined(__AVX2__) && USIZE >= 32
|
#if defined(__AVX2__) && USIZE >= 32
|
||||||
#define _mm256_set1_epi64(a) _mm256_set1_epi64x(a)
|
#define _mm256_set1_epi64(a) _mm256_set1_epi64x(a)
|
||||||
@ -354,7 +354,7 @@ size_t T2(fpfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start
|
|||||||
#endif
|
#endif
|
||||||
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
||||||
}
|
}
|
||||||
if(n = (in+n)-ip) { uint_t b = 0;
|
if((n = (in+n)-ip) != 0) { uint_t b = 0;
|
||||||
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
||||||
b = T2(clz,USIZE)(b);
|
b = T2(clz,USIZE)(b);
|
||||||
*op++ = b;
|
*op++ = b;
|
||||||
@ -380,7 +380,7 @@ size_t T2(fpfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start
|
|||||||
unsigned b = *ip++; ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
|
unsigned b = *ip++; ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
|
||||||
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
||||||
}
|
}
|
||||||
if(n = ((uint_t *)out+n) - op) {
|
if((n = ((uint_t *)out+n) - op) != 0) {
|
||||||
unsigned b = *ip++; ip = T2(P4DEC,USIZE)(ip, n, _p);
|
unsigned b = *ip++; ip = T2(P4DEC,USIZE)(ip, n, _p);
|
||||||
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
||||||
}
|
}
|
||||||
@ -391,7 +391,7 @@ size_t T2(fpfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start
|
|||||||
//-------- TurboFloat DFCM: Differential Finite Context Method Predictor ----------------------------------------------------------
|
//-------- TurboFloat DFCM: Differential Finite Context Method Predictor ----------------------------------------------------------
|
||||||
size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0};
|
uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0};
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
|
|
||||||
#define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \
|
#define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \
|
||||||
htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = u;\
|
htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = u;\
|
||||||
@ -403,7 +403,7 @@ size_t T2(fpdfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t star
|
|||||||
for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); }
|
||||||
*op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
*op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
||||||
}
|
}
|
||||||
if(n = (in+n)-ip) { uint_t b;
|
if((n = (in+n)-ip) != 0) { uint_t b;
|
||||||
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
||||||
b = T2(clz,USIZE)(b);
|
b = T2(clz,USIZE)(b);
|
||||||
for(p = _p; p != &_p[n]; p++) TR(0,USIZE);
|
for(p = _p; p != &_p[n]; p++) TR(0,USIZE);
|
||||||
@ -429,7 +429,7 @@ size_t T2(fpdfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t star
|
|||||||
ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
|
ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
|
||||||
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
||||||
}
|
}
|
||||||
if(n = ((uint_t *)out+n) - op) {
|
if((n = ((uint_t *)out+n) - op) != 0) {
|
||||||
uint_t b = *ip++;
|
uint_t b = *ip++;
|
||||||
ip = T2(P4DEC,USIZE)(ip, n, _p);
|
ip = T2(P4DEC,USIZE)(ip, n, _p);
|
||||||
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
||||||
@ -441,7 +441,7 @@ size_t T2(fpdfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t star
|
|||||||
//-------- TurboFloat Double delta DFCM: Differential Finite Context Method Predictor ----------------------------------------------------------
|
//-------- TurboFloat Double delta DFCM: Differential Finite Context Method Predictor ----------------------------------------------------------
|
||||||
size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0},start0=start; start=0;
|
uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0},start0=start; start=0;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
|
|
||||||
#define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \
|
#define FE(_i_,_usize_) { T3(uint, _usize_, _t) u = ip[_i_]; p[_i_] = XORENC(u, (htab[h]+start),_usize_); b |= p[_i_]; \
|
||||||
htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = start0; start0 = u;\
|
htab[h] = start = u - start; h = T2(HASH,_usize_)(h,start); start = start0; start0 = u;\
|
||||||
@ -456,7 +456,7 @@ size_t T2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t sta
|
|||||||
for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4) { TR(0,USIZE); TR(1,USIZE); TR(2,USIZE); TR(3,USIZE); }
|
||||||
*op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
*op++ = b; op = T2(P4ENCV,USIZE)(_p, VSIZE, op); PREFETCH(ip+512,0); if(op >= out_) goto e;
|
||||||
}
|
}
|
||||||
if(n = (in+n)-ip) {
|
if((n = (in+n)-ip) != 0) {
|
||||||
uint_t b = 0;
|
uint_t b = 0;
|
||||||
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,ip++) FE(0,USIZE);
|
||||||
b = T2(clz,USIZE)(b);
|
b = T2(clz,USIZE)(b);
|
||||||
@ -484,7 +484,7 @@ size_t T2(fp2dfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t sta
|
|||||||
ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
|
ip = T2(P4DECV,USIZE)(ip, VSIZE, _p);
|
||||||
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
for(p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); }
|
||||||
}
|
}
|
||||||
if(n = ((uint_t *)out+n) - op) {
|
if((n = ((uint_t *)out+n) - op) != 0) {
|
||||||
uint_t b = *ip++;
|
uint_t b = *ip++;
|
||||||
ip = T2(P4DEC,USIZE)(ip, n, _p);
|
ip = T2(P4DEC,USIZE)(ip, n, _p);
|
||||||
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
for(p = _p; p != &_p[n]; p++,op++) FD(0,USIZE);
|
||||||
@ -510,7 +510,7 @@ size_t T2(fp2dfcmdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t sta
|
|||||||
size_t T2(fpgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(fpgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t *ip = in;
|
uint_t *ip = in;
|
||||||
unsigned ol = 0, ot = 0;
|
unsigned ol = 0, ot = 0;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
bitdef(bw,br);
|
bitdef(bw,br);
|
||||||
if(start) { ol = T2(clz,USIZE)(start); ot = T2(ctz,USIZE)(start); }
|
if(start) { ol = T2(clz,USIZE)(start); ot = T2(ctz,USIZE)(start); }
|
||||||
*op++ = 0;
|
*op++ = 0;
|
||||||
@ -561,7 +561,7 @@ size_t T2(fpgdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
|
|||||||
size_t T2(fphenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
size_t T2(fphenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) {
|
||||||
uint_t *ip = in;
|
uint_t *ip = in;
|
||||||
int ol = USIZE+1,s;
|
int ol = USIZE+1,s;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
*op++ = 0;
|
*op++ = 0;
|
||||||
bitdef(bw,br);
|
bitdef(bw,br);
|
||||||
if(start) ol = T2(clz,USIZE)(start);
|
if(start) ol = T2(clz,USIZE)(start);
|
||||||
@ -636,7 +636,7 @@ size_t T2(fphdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start)
|
|||||||
|
|
||||||
size_t T2(fpc0enc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // simple hash table
|
size_t T2(fpc0enc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // simple hash table
|
||||||
uint_t *ip = in;
|
uint_t *ip = in;
|
||||||
unsigned char *op = out, out_ = out+n*USIZE/8;
|
unsigned char *op = out, *out_ = out+n*USIZE/8;
|
||||||
unsigned htab[1<<FX_BITS] = {0}, ol = USIZE+1;
|
unsigned htab[1<<FX_BITS] = {0}, ol = USIZE+1;
|
||||||
bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start);
|
bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start);
|
||||||
|
|
||||||
@ -673,7 +673,7 @@ size_t T2(fpc0enc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start)
|
|||||||
|
|
||||||
size_t T2(fpcenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // double entry hashtable
|
size_t T2(fpcenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // double entry hashtable
|
||||||
uint_t *ip = in;
|
uint_t *ip = in;
|
||||||
unsigned char *op = out, out_ = out+n*(USIZE/8);
|
unsigned char *op = out, *out_ = out+n*(USIZE/8);
|
||||||
unsigned htab[1<<(FX_BITS+1)] = {0}, ol = USIZE+1;
|
unsigned htab[1<<(FX_BITS+1)] = {0}, ol = USIZE+1;
|
||||||
bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start);
|
bitdef(bw,br); //if(start) ol = T2(clz,USIZE)(start);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user