TurboPFor: Floating Point encode/decode

This commit is contained in:
x
2018-02-14 09:56:59 +01:00
parent 18567dfc2f
commit 58423fab3c

256
fp.c
View File

@ -39,117 +39,125 @@
#define DEC64(u,h) zigzagdec64(u)+(int64_t)h #define DEC64(u,h) zigzagdec64(u)+(int64_t)h
//---- Last value Predictor //---- Last value Predictor
unsigned char *fppenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t fppenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip, _p[VSIZE], *p; uint64_t _p[VSIZE+32], *ip, *p;
unsigned char *op = out;
#define FE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, start); start = u; } #define FE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, start); start = u; }
for(ip = in; ip != in + (n&~(VSIZE-1)); ) { for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); } for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); }
out = p4enc64(_p, VSIZE, out); __builtin_prefetch(ip+512, 0); op = p4enc64(_p, VSIZE, op); __builtin_prefetch(ip+512, 0);
} }
if(n = ((uint64_t *)in+n)-ip) { if(n = (in+n)-ip) {
for(p = _p; p != &_p[n]; p++,ip++) FE64(0); for(p = _p; p != &_p[n]; p++,ip++) FE64(0);
out = p4enc64(_p, n, out); op = p4enc64(_p, n, op);
} }
return out; return op - out;
} }
unsigned char *fppdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { size_t fppdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) {
uint64_t *op, _p[VSIZE+32],*p; uint64_t *op, _p[VSIZE+32],*p;
unsigned char *ip = in;
#define FD64(i) { uint64_t u = DEC64(p[i], start); op[i] = u; start = u; } #define FD64(i) { uint64_t u = DEC64(p[i], start); op[i] = u; start = u; }
for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(in+512, 0); for(op = out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0);
for(in = p4dec64(in, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); } for(ip = p4dec64(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); }
} }
if(n = ((uint64_t *)out+n) - op) if(n = (out+n) - op)
for(in = p4dec64(in, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0); for(ip = p4dec64(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0);
return in; return ip - in;
} }
// delta of delta // delta of delta
unsigned char *fpddenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t fpddenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip, _p[VSIZE], *p; int64_t pd=0; uint64_t _p[VSIZE+32], *ip, *p, pd = 0;
unsigned char *op = out;
#define FE64(i) { uint64_t u = ip[i],d = u-start; p[i] = ENC64((int64_t)d,pd); pd = d; start = u; } #define FE64(i) { uint64_t u = ip[i]; start = u-start; p[i] = ENC64(start,pd); pd = start; start = u; }
for(ip = (uint64_t *)in; ip != in + (n&~(VSIZE-1)); ) { for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); } for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); }
out = p4enc64(_p, VSIZE, out); __builtin_prefetch(ip+512, 0); op = p4enc64(_p, VSIZE, op); __builtin_prefetch(ip+512, 0);
} }
if(n = ((uint64_t *)in+n)-ip) { if(n = (in+n)-ip) {
for(p = _p; p != &_p[n]; p++,ip++) FE64(0); for(p = _p; p != &_p[n]; p++,ip++) FE64(0);
out = p4enc64(_p, n, out); op = p4enc64(_p, n, op);
} }
return out; return op - out;
} }
unsigned char *fpdddec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { size_t fpdddec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) {
uint64_t *op, h = 0, _p[VSIZE+32],*p, pd=0; uint64_t _p[VSIZE+32],*p, *op, pd=0;
unsigned char *ip = in;
#define FD64(i) { uint64_t u = DEC64(p[i],start+pd); op[i] = u; pd = u - start; start = u; } #define FD64(i) { uint64_t u = DEC64(p[i],start+pd); op[i] = u; pd = u - start; start = u; }
for(op = out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(in+512, 0); for(op = out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0);
for(in = p4dec64(in, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); } for(ip = p4dec64(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); }
} }
if(n = ((uint64_t *)out+n) - op) if(n = (out+n) - op)
for(in = p4dec64(in, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0); for(ip = p4dec64(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0);
return in; return ip - in;
} }
#define HBITS 13 //15 #define HBITS 13 //15
#define HASH64(_h_,_u_) (((_h_)<<5 ^ (_u_)>>50) & ((1u<<HBITS)-1)) #define HASH64(_h_,_u_) (((_h_)<<5 ^ (_u_)>>50) & ((1u<<HBITS)-1))
//---- FCM: Finite Context Method Predictor //---- FCM: Finite Context Method Predictor
unsigned char *fpfcmenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t fpfcmenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip, htab[1<<HBITS] = {0}, h = 0, _p[VSIZE], *p; uint64_t htab[1<<HBITS] = {0}, _p[VSIZE+32], *ip, h = 0, *p;
unsigned char *op = out;
#define FE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, htab[h]); htab[h] = u; h = HASH64(h,u); } #define FE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, htab[h]); htab[h] = u; h = HASH64(h,u); }
for(ip = (uint64_t *)in; ip != in + (n&~(VSIZE-1)); ) { for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); } for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE64(0); FE64(1); FE64(2); FE64(3); }
out = p4enc64(_p, VSIZE, out); __builtin_prefetch(ip+512, 0); op = p4enc64(_p, VSIZE, op); __builtin_prefetch(ip+512, 0);
} }
if(n = ((uint64_t *)in+n)-ip) { if(n = (in+n)-ip) {
for(p = _p; p != &_p[n]; p++,ip++) FE64(0); for(p = _p; p != &_p[n]; p++,ip++) FE64(0);
out = p4enc64(_p, n, out); op = p4enc64(_p, n, op);
} }
return out; return op - out;
} }
unsigned char *fpfcmdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { size_t fpfcmdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) {
uint64_t *op, htab[1<<HBITS] = {0}, h = 0, _p[VSIZE+32],*p; uint64_t *op, htab[1<<HBITS] = {0}, h = 0, _p[VSIZE+32],*p;
unsigned char *ip = in;
#define FD64(i) { uint64_t u = DEC64(p[i], htab[h]); op[i] = u; htab[h] = u; h = HASH64(h,u); } #define FD64(i) { uint64_t u = DEC64(p[i], htab[h]); op[i] = u; htab[h] = u; h = HASH64(h,u); }
for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(in+512, 0); for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0);
for(in = p4dec64(in, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); } for(ip = p4dec64(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); }
} }
if(n = ((uint64_t *)out+n) - op) if(n = ((uint64_t *)out+n) - op)
for(in = p4dec64(in, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0); for(ip = p4dec64(ip, n, _p), p = _p; p != &_p[n]; p++,op++) FD64(0);
return in; return ip - in;
} }
// DFCM: Differential Finite Context Method Predictor // DFCM: Differential Finite Context Method Predictor
unsigned char *fpdfcmenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t fpdfcmenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0}; uint64_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<<HBITS] = {0};
unsigned char *op = out;
#define DE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, (htab[h]+start)); htab[h] = start = u - start; h = HASH64(h,start); start = u; } #define DE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, (htab[h]+start)); htab[h] = start = u - start; h = HASH64(h,start); start = u; }
for(ip = (uint64_t *)in; ip != in + (n&~(VSIZE-1)); ) { for(ip = in; ip != in + (n&~(VSIZE-1)); ) {
for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { DE64(0); DE64(1); DE64(2); DE64(3); } for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { DE64(0); DE64(1); DE64(2); DE64(3); }
out = p4enc64(_p, VSIZE, out); __builtin_prefetch(ip+512, 0); op = p4enc64(_p, VSIZE, op); __builtin_prefetch(ip+512, 0);
} }
if(n = ((uint64_t *)in+n)-ip) { if(n = (in+n)-ip) {
for(p = _p; p != &_p[n]; p++,ip++) DE64(0); for(p = _p; p != &_p[n]; p++,ip++) DE64(0);
out = p4enc64(_p, n, out); op = p4enc64(_p, n, op);
} }
return out; return op - out;
} }
unsigned char *fpdfcmdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { size_t fpdfcmdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) {
uint64_t _p[VSIZE+32], *op, h = 0, *p, htab[1<<HBITS] = {0}; uint64_t _p[VSIZE+32], *op, h = 0, *p, htab[1<<HBITS] = {0};
unsigned char *ip = in;
#define DD64(i) { uint64_t u = DEC64(p[i], (htab[h]+start)); op[i] = u; htab[h] = start = u-start; h = HASH64(h,start); start = u; } #define DD64(i) { uint64_t u = DEC64(p[i], (htab[h]+start)); op[i] = u; htab[h] = start = u-start; h = HASH64(h,start); start = u; }
for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(in+512, 0); for(op = (uint64_t*)out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0);
for(in = p4dec64(in, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { DD64(0); DD64(1); DD64(2); DD64(3); } for(ip = p4dec64(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { DD64(0); DD64(1); DD64(2); DD64(3); }
} }
if(n = ((uint64_t *)out+n) - op) if(n = ((uint64_t *)out+n) - op)
for(in = p4dec64(in, n, _p), p = _p; p != &_p[n]; p++,op++) DD64(0); for(ip = p4dec64(ip, n, _p), p = _p; p != &_p[n]; p++,op++) DD64(0);
return in; return ip - in;
} }
// ------------------ bitio compression --------------------------- // ------------------ bitio compression ---------------------------
@ -173,42 +181,45 @@ unsigned char *fpdfcmdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t
#define bitget( _bw_,_br_,_nb_,_x_) _x_ = bitpeek64(_bw_, _br_, _nb_), bitrmv(_bw_, _br_, _nb_) #define bitget( _bw_,_br_,_nb_,_x_) _x_ = bitpeek64(_bw_, _br_, _nb_), bitrmv(_bw_, _br_, _nb_)
#define bitdnorm( _bw_,_br_,_ip_) _bw_ = ctou64(_ip_ += (_br_>>3)), _br_ &= 7 #define bitdnorm( _bw_,_br_,_ip_) _bw_ = ctou64(_ip_ += (_br_>>3)), _br_ &= 7
#define bitalign( _bw_,_br_,_ip_) _ip_ += (_br_+7)>>3 #define bitalign( _bw_,_br_,_ip_) (_ip_ += (_br_+7)>>3)
#define bitput64(bw,br,_b_,_x_,_op_) if((_b_)>45) { bitput(bw,br,(_b_)-32, (_x_)>>32); bitenorm(bw,br,_op_); bitput(bw,br,32,(unsigned)(_x_)); } else bitput(bw,br,_b_,_x_); #define bitput64(bw,br,_b_,_x_,_op_) if((_b_)>45) { bitput(bw,br,(_b_)-32, (_x_)>>32); bitenorm(bw,br,_op_); bitput(bw,br,32,(unsigned)(_x_)); } else bitput(bw,br,_b_,_x_);
#define bitget64(bw,br,_b_,_x_,_ip_) if((_b_)>45) { unsigned _v; bitget(bw,br,(_b_)-32,_x_); bitdnorm(bw,br,_ip_); bitget(bw,br,32,_v); _x_ = _x_<<32|_v; } else bitget(bw,br,_b_,_x_); #define bitget64(bw,br,_b_,_x_,_ip_) if((_b_)>45) { unsigned _v; bitget(bw,br,(_b_)-32,_x_); bitdnorm(bw,br,_ip_); bitget(bw,br,32,_v); _x_ = _x_<<32|_v; } else bitget(bw,br,_b_,_x_);
// Fastest Gorilla (see Facebook paper) Floating point/Integer compression implementation using zigzag encoding instead of XOR. Compression 5 GB/s, Decompression: 10 GB/s // Fastest Gorilla (see Facebook paper) Floating point/Integer compression implementation using zigzag encoding instead of XOR. Compression 5 GB/s, Decompression: 10 GB/s
unsigned char *fpgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t fpgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip; uint64_t *ip;
unsigned ol = 0,ot = 0; unsigned ol = 0,ot = 0;
unsigned char *op = out;
bitdef(bw,br); bitdef(bw,br);
#define FE64(i) { uint64_t z = ENC64(ip[i], start); start = ip[i];\ #define FE64(i) { uint64_t z = ENC64(ip[i], start); start = ip[i];\
if(likely(!z)) bitput( bw,br, 1, 1);\ if(likely(!z)) bitput( bw,br, 1, 1);\
else { unsigned t = ctz64(z), l = clz64(z); l = l>31?31:l;\ else { unsigned t = ctz64(z), l = clz64(z); l = l>31?31:l;\
if(l >= ol && t >= ot) { bitput( bw,br, 2, 2); l = 64 - ol - ot; z>>=ot; bitput64(bw,br, l, z,out); }\ if(l >= ol && t >= ot) { bitput( bw,br, 2, 2); l = 64 - ol - ot; z>>=ot; bitput64(bw,br, l, z,op); }\
else { bitput( bw,br, 2+6+5, (t-1)<<5|l); ol = 64 - l - t; z>>= t; bitput64(bw,br, ol, z,out); ol = l; ot = t; } \ else { bitput( bw,br, 2+6+5, (t-1)<<5|l); ol = 64 - l - t; z>>= t; bitput64(bw,br, ol, z,op); ol = l; ot = t; } \
} bitenorm(bw,br,out);\ } bitenorm(bw,br,op);\
} }
for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE64(0); FE64(1); FE64(2); FE64(3); } for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE64(0); FE64(1); FE64(2); FE64(3); }
for( ; ip != in + n ; ip++ ) FE64(0); for( ; ip != in + n ; ip++ ) FE64(0);
bitflush(bw,br,out); bitflush(bw,br,op);
return out; return op - out;
} }
unsigned char *fpgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; size_t fpgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return 0;
uint64_t *op; uint64_t *op;
unsigned ol = 0,ot = 0,x; unsigned ol = 0,ot = 0,x;
unsigned char *ip = in;
bitdef(bw,br); bitdef(bw,br);
#define FD64(i) { uint64_t z=0; unsigned _x; bitget(bw,br,1,_x); \ #define FD64(i) { uint64_t z=0; unsigned _x; bitget(bw,br,1,_x); \
if(likely(!_x)) { bitget(bw,br,1,_x);\ if(likely(!_x)) { bitget(bw,br,1,_x);\
if(!_x) { bitget(bw,br,11,_x); ot = (_x>>5)+1; ol = _x & 31; } bitget64(bw,br,64 - ol - ot,z,in); z<<=ot;\ if(!_x) { bitget(bw,br,11,_x); ot = (_x>>5)+1; ol = _x & 31; } bitget64(bw,br,64 - ol - ot,z,ip); z<<=ot;\
} op[i] = start = DEC64(z, start); bitdnorm(bw,br,in);\ } op[i] = start = DEC64(z, start); bitdnorm(bw,br,ip);\
} }
for(bitdnorm(bw,br,in),op = out; op != out+(n&~(4-1)); op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); __builtin_prefetch(in+512, 0); } for(bitdnorm(bw,br,ip),op = out; op != out+(n&~(4-1)); op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); __builtin_prefetch(ip+512, 0); }
for( ; op != out+n; op++) FD64(0); for( ; op != out+n; op++) FD64(0);
return bitalign(bw,br,in); bitalign(bw,br,ip);
return ip - in;
} }
// Improved Gorilla style compression with sliding double delta for timestamps in time series. // Improved Gorilla style compression with sliding double delta for timestamps in time series.
@ -220,19 +231,20 @@ unsigned char *fpgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t sta
#define ENC32(_pp_, _ip_, _d_, _op_) do {\ #define ENC32(_pp_, _ip_, _d_, _op_) do {\
size_t _r = _ip_ - _pp_;\ size_t _r = _ip_ - _pp_;\
if(_r > NL) { _r -= NL; unsigned _b = (bsr32(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,out); }\ if(_r > NL) { _r -= NL; unsigned _b = (bsr32(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,_op_); }\
else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,out); }\ else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,_op_); }\
_d_ = zigzagenc32(_d_);\ _d_ = zigzagenc32(_d_);\
if(!_d_) bitput(bw,br, 1, 1);\ if(!_d_) bitput(bw,br, 1, 1);\
else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\ else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\
else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\ else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\
else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\ else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\
else { unsigned _b = (bsr32(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); bitput(bw,br, _b<<3, _d_); }\ else { unsigned _b = (bsr32(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); bitput(bw,br, _b<<3, _d_); }\
bitenorm(bw,br,out);\ bitenorm(bw,br,_op_);\
} while(0) } while(0)
unsigned char *bitgenc32(uint32_t *in, size_t n, unsigned char *out, uint32_t start) { size_t bitgenc32(uint32_t *in, size_t n, unsigned char *out, uint32_t start) {
uint32_t *ip = in, pd = 0, *pp = in,dd; uint32_t *ip = in, pd = 0, *pp = in,dd;
unsigned char *op = out;
bitdef(bw,br); bitdef(bw,br);
if(n > 4) if(n > 4)
@ -243,7 +255,7 @@ unsigned char *bitgenc32(uint32_t *in, size_t n, unsigned char *out, uint32_t st
start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0); start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0);
continue; continue;
a:; a:;
ENC32(pp,ip, dd, out); ENC32(pp,ip, dd, op);
pp = ++ip; pp = ++ip;
} }
@ -251,21 +263,23 @@ unsigned char *bitgenc32(uint32_t *in, size_t n, unsigned char *out, uint32_t st
start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++; start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++;
continue; continue;
b:; b:;
ENC32(pp,ip, dd, out); ENC32(pp,ip, dd, op);
pp = ++ip; pp = ++ip;
} }
if(ip > pp) { if(ip > pp) {
start = ip[0] - start; dd = start-pd; start = ip[0] - start; dd = start-pd;
ENC32(pp, ip, dd, out); ENC32(pp, ip, dd, op);
} }
bitflush(bw,br,out); bitflush(bw,br,op);
return out; return op - out;
} }
unsigned char *bitgdec32(unsigned char *in, size_t n, uint32_t *out, uint32_t start) { if(!n) return in; size_t bitgdec32(unsigned char *in, size_t n, uint32_t *out, uint32_t start) { if(!n) return 0;
uint32_t *op = out, pd = 0; uint32_t *op = out, pd = 0;
unsigned char *ip = in;
bitdef(bw,br); bitdef(bw,br);
for(bitdnorm(bw,br,in); op < out+n; ) { __builtin_prefetch(in+384, 0);
for(bitdnorm(bw,br,ip); op < out+n; ) { __builtin_prefetch(ip+384, 0);
uint32_t dd = bitpeek(bw,br); uint32_t dd = bitpeek(bw,br);
if(dd & 1) bitrmv(bw,br, 0+1), dd = 0; if(dd & 1) bitrmv(bw,br, 0+1), dd = 0;
else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u32(dd>>2, N2); else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u32(dd>>2, N2);
@ -275,7 +289,7 @@ unsigned char *bitgdec32(unsigned char *in, size_t n, uint32_t *out, uint32_t st
unsigned b,*_op; size_t r; unsigned b,*_op; size_t r;
bitget(bw,br, 4+3, b); bitget(bw,br, 4+3, b);
if(!b) { if(!b) {
bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,in); bitdnorm(bw,br,in); bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,ip); bitdnorm(bw,br,ip);
for(r+=NL, _op = op; op != _op+(r&~7); op += 8) for(r+=NL, _op = op; op != _op+(r&~7); op += 8)
op[0]=(start+=pd), op[0]=(start+=pd),
op[1]=(start+=pd), op[1]=(start+=pd),
@ -293,9 +307,10 @@ unsigned char *bitgdec32(unsigned char *in, size_t n, uint32_t *out, uint32_t st
} }
pd += zigzagdec32(dd); pd += zigzagdec32(dd);
*op++ = (start += pd); *op++ = (start += pd);
bitdnorm(bw,br,in); bitdnorm(bw,br,ip);
} }
return bitalign(bw,br,in); bitalign(bw,br,ip);
return ip - in;
} }
#define N2 6 // for seconds/milliseconds,... time series #define N2 6 // for seconds/milliseconds,... time series
@ -304,19 +319,20 @@ unsigned char *bitgdec32(unsigned char *in, size_t n, uint32_t *out, uint32_t st
#define ENC64(_pp_, _ip_, _d_, _op_) do {\ #define ENC64(_pp_, _ip_, _d_, _op_) do {\
uint64_t _r = _ip_ - _pp_;\ uint64_t _r = _ip_ - _pp_;\
if(_r > NL) { _r -= NL; unsigned _b = (bsr64(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,out); }\ if(_r > NL) { _r -= NL; unsigned _b = (bsr64(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,_op_); }\
else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,out); }\ else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,_op_); }\
_d_ = zigzagenc64(_d_);\ _d_ = zigzagenc64(_d_);\
if(!_d_) bitput(bw,br, 1, 1);\ if(!_d_) bitput(bw,br, 1, 1);\
else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\ else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\
else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\ else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\
else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\ else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\
else { unsigned _b = (bsr64(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); bitput64(bw,br, _b<<3, _d_,_op_); }\ else { unsigned _b = (bsr64(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); bitput64(bw,br, _b<<3, _d_,_op_); }\
bitenorm(bw,br,out);\ bitenorm(bw,br,_op_);\
} while(0) } while(0)
unsigned char *bitgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t bitgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip = in, pd = 0, *pp = in,dd; uint64_t *ip = in, pd = 0, *pp = in,dd;
unsigned char *op = out;
bitdef(bw,br); bitdef(bw,br);
if(n > 4) if(n > 4)
@ -327,7 +343,7 @@ unsigned char *bitgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t st
start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0); start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0);
continue; continue;
a:; a:;
ENC64(pp,ip, dd, out); ENC64(pp,ip, dd, op);
pp = ++ip; pp = ++ip;
} }
@ -335,21 +351,23 @@ unsigned char *bitgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t st
start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++; start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++;
continue; continue;
b:; b:;
ENC64(pp,ip, dd, out); ENC64(pp,ip, dd, op);
pp = ++ip; pp = ++ip;
} }
if(ip > pp) { if(ip > pp) {
start = ip[0] - start; dd = start-pd; start = ip[0] - start; dd = start-pd;
ENC64(pp, ip, dd, out); ENC64(pp, ip, dd, op);
} }
bitflush(bw,br,out); bitflush(bw,br,op);
return out; return op - out;
} }
unsigned char *bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; size_t bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return 0;
uint64_t *op = out, pd = 0; uint64_t *op = out, pd = 0;
unsigned char *ip = in;
bitdef(bw,br); bitdef(bw,br);
for(bitdnorm(bw,br,in); op < out+n; ) { __builtin_prefetch(in+384, 0); for(bitdnorm(bw,br,ip); op < out+n; ) { __builtin_prefetch(ip+384, 0);
uint64_t dd = bitpeek(bw,br); uint64_t dd = bitpeek(bw,br);
if(dd & 1) bitrmv(bw,br, 0+1), dd = 0; if(dd & 1) bitrmv(bw,br, 0+1), dd = 0;
else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u64(dd>>2, N2); else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u64(dd>>2, N2);
@ -359,7 +377,7 @@ unsigned char *bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t st
unsigned b; uint64_t r,*_op; unsigned b; uint64_t r,*_op;
bitget(bw,br, 4+3, b); bitget(bw,br, 4+3, b);
if(!b) { if(!b) {
bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,in); bitdnorm(bw,br,in); bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,ip); bitdnorm(bw,br,ip);
//r+=NL; while(r--) *op++=(start+=pd); //r+=NL; while(r--) *op++=(start+=pd);
for(r+=NL, _op = op; op != _op+(r&~7); op += 8) for(r+=NL, _op = op; op != _op+(r&~7); op += 8)
op[0]=(start+=pd), op[0]=(start+=pd),
@ -374,13 +392,14 @@ unsigned char *bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t st
*op = (start+=pd); *op = (start+=pd);
continue; continue;
} }
bitget64(bw,br,((b>>4)+1)*8,dd,in); bitget64(bw,br,((b>>4)+1)*8,dd,ip);
} }
pd += zigzagdec64(dd); pd += zigzagdec64(dd);
*op++ = (start += pd); *op++ = (start += pd);
bitdnorm(bw,br,in); bitdnorm(bw,br,ip);
} }
return bitalign(bw,br,in); bitalign(bw,br,ip);
return ip - in;
} }
#if 0 #if 0
@ -388,26 +407,29 @@ unsigned char *bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t st
#define N2 7 // for seconds time series #define N2 7 // for seconds time series
#define N3 9 #define N3 9
#define N4 12 #define N4 12
unsigned char *bitg0enc32(uint32_t *in, size_t n, unsigned char *out, uint32_t start) { size_t bitg0enc32(uint32_t *in, size_t n, unsigned char *out, uint32_t start) {
uint32_t *ip, pd = 0; uint32_t *ip, pd = 0;
unsigned char *op = out;
bitdef(bw,br); bitdef(bw,br);
#define FE32(i) { uint32_t dd; start = ip[i] - start; dd = start-pd; pd = start; dd = zigzagenc32(dd); start = ip[i];\ #define FE32(i) { uint32_t dd; start = ip[i] - start; dd = start-pd; pd = start; dd = zigzagenc32(dd); start = ip[i];\
if(!dd) bitput(bw,br, 1, 1);\ if(!dd) bitput(bw,br, 1, 1);\
else if(dd < (1<< (N2-1))) bitput(bw,br, N2+2,dd<<2|2);\ else if(dd < (1<< (N2-1))) bitput(bw,br, N2+2,dd<<2|2);\
else if(dd < (1<< (N3-1))) bitput(bw,br, N3+3,dd<<3|4);\ else if(dd < (1<< (N3-1))) bitput(bw,br, N3+3,dd<<3|4);\
else if(dd < (1<< (N4-1))) bitput(bw,br, N4+4,dd<<4|8);\ else if(dd < (1<< (N4-1))) bitput(bw,br, N4+4,dd<<4|8);\
else { unsigned _b = (bsr32(dd)+7)>>3; bitput(bw,br,4+2,(_b-1)<<4); bitput(bw,br, _b<<3, dd); }\ else { unsigned _b = (bsr32(dd)+7)>>3; bitput(bw,br,4+2,(_b-1)<<4); bitput(bw,br, _b<<3, dd); }\
bitenorm(bw,br,out);\ bitenorm(bw,br,op);\
} }
for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE32(0); FE32(1); FE32(2); FE32(3); } for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE32(0); FE32(1); FE32(2); FE32(3); }
for( ; ip != in + n ; ip++ ) FE32(0); for( ; ip != in + n ; ip++ ) FE32(0);
bitflush(bw,br,out); bitflush(bw,br,op);
return out; return op - out;
} }
unsigned char *bitg0dec32(unsigned char *in, size_t n, uint32_t *out, uint32_t start) { if(!n) return in; size_t bitg0dec32(unsigned char *in, size_t n, uint32_t *out, uint32_t start) { if(!n) return 0;
uint32_t *op, pd = 0; uint32_t *op, pd = 0;
unsigned char *ip = in;
bitdef(bw,br); bitdef(bw,br);
#define FD32(i) { uint32_t dd = bitpeek(bw,br);\ #define FD32(i) { uint32_t dd = bitpeek(bw,br);\
@ -416,18 +438,20 @@ unsigned char *bitg0dec32(unsigned char *in, size_t n, uint32_t *out, uint32_t s
else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u32(dd>>3, N3);\ else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u32(dd>>3, N3);\
else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u32(dd>>4, N4);\ else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u32(dd>>4, N4);\
else { unsigned _b; bitget(bw,br,4+2,_b); bitget(bw,br,((_b>>4)+1)*8,dd); }\ else { unsigned _b; bitget(bw,br,4+2,_b); bitget(bw,br,((_b>>4)+1)*8,dd); }\
pd += zigzagdec32(dd); op[i] = (start += pd); bitdnorm(bw,br,in);\ pd += zigzagdec32(dd); op[i] = (start += pd); bitdnorm(bw,br,ip);\
} }
for(bitdnorm(bw,br,in),op = out; op != out+(n&~(4-1)); op+=4) { FD32(0); FD32(1); FD32(2); FD32(3); __builtin_prefetch(in+512, 0); } for(bitdnorm(bw,br,ip),op = out; op != out+(n&~(4-1)); op+=4) { FD32(0); FD32(1); FD32(2); FD32(3); __builtin_prefetch(ip+512, 0); }
for(; op != out+n; op++) FD32(0); for(; op != out+n; op++) FD32(0);
return bitalign(bw,br,in); bitalign(bw,br,ip);
return ip - in;
} }
#define N2 6 // for seconds/milliseconds,... time series #define N2 6 // for seconds/milliseconds,... time series
#define N3 12 #define N3 12
#define N4 20 #define N4 20
unsigned char *bitg0enc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { size_t bitg0enc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) {
uint64_t *ip, pd = 0; uint64_t *ip, pd = 0;
unsigned char *op = out;
bitdef(bw,br); bitdef(bw,br);
#define FE64(i) { uint64_t dd; start = (int64_t)ip[i] - (int64_t)start; dd = (int64_t)start-(int64_t)pd; pd = start; dd = zigzagenc64(dd); start = ip[i];\ #define FE64(i) { uint64_t dd; start = (int64_t)ip[i] - (int64_t)start; dd = (int64_t)start-(int64_t)pd; pd = start; dd = zigzagenc64(dd); start = ip[i];\
@ -435,17 +459,18 @@ unsigned char *bitg0enc64(uint64_t *in, size_t n, unsigned char *out, uint64_t s
else if(dd < (1<< (N2-1))) bitput(bw,br, N2+2,dd<<2|2);\ else if(dd < (1<< (N2-1))) bitput(bw,br, N2+2,dd<<2|2);\
else if(dd < (1<< (N3-1))) bitput(bw,br, N3+3,dd<<3|4);\ else if(dd < (1<< (N3-1))) bitput(bw,br, N3+3,dd<<3|4);\
else if(dd < (1<< (N4-1))) bitput(bw,br, N4+4,dd<<4|8);\ else if(dd < (1<< (N4-1))) bitput(bw,br, N4+4,dd<<4|8);\
else { unsigned _b = (bsr64(dd)+7)>>3; bitput(bw,br,3+4,(_b-1)<<4); bitput64(bw,br, _b<<3, dd, out); }\ else { unsigned _b = (bsr64(dd)+7)>>3; bitput(bw,br,3+4,(_b-1)<<4); bitput64(bw,br, _b<<3, dd, op); }\
bitenorm(bw,br,out);\ bitenorm(bw,br,op);\
} }
for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE64(0); FE64(1); FE64(2); FE64(3); } for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE64(0); FE64(1); FE64(2); FE64(3); }
for( ; ip != in + n ; ip++ ) FE64(0); for( ; ip != in + n ; ip++ ) FE64(0);
bitflush(bw,br,out); bitflush(bw,br,op);
return out; return op - out;
} }
unsigned char *bitg0dec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; size_t bitg0dec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return 0;
uint64_t *op, pd = 0; uint64_t *op, pd = 0;
unsigned char *ip = in;
bitdef(bw,br); bitdef(bw,br);
#define FD64(i) { uint64_t dd = bitpeek(bw,br);\ #define FD64(i) { uint64_t dd = bitpeek(bw,br);\
@ -453,12 +478,13 @@ unsigned char *bitg0dec64(unsigned char *in, size_t n, uint64_t *out, uint64_t s
else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u64(dd>>2, N2);\ else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u64(dd>>2, N2);\
else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u64(dd>>3, N3);\ else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u64(dd>>3, N3);\
else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u64(dd>>4, N4);\ else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u64(dd>>4, N4);\
else { unsigned _b; bitget(bw,br,4+3,_b); _b = ((_b>>4)+1)*8; bitget64(bw,br,_b,dd,in); }\ else { unsigned _b; bitget(bw,br,4+3,_b); _b = ((_b>>4)+1)*8; bitget64(bw,br,_b,dd,ip); }\
pd += zigzagdec64(dd); start += pd; op[i] = start; bitdnorm(bw,br,in);\ pd += zigzagdec64(dd); start += pd; op[i] = start; bitdnorm(bw,br,ip);\
} }
for(bitdnorm(bw,br,in),op = out; op != out+(n&~(4-1)); op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); __builtin_prefetch(in+512, 0); } for(bitdnorm(bw,br,ip),op = out; op != out+(n&~(4-1)); op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); __builtin_prefetch(ip+512, 0); }
for(; op != out+n; op++) FD64(0); for(; op != out+n; op++) FD64(0);
return bitalign(bw,br,in); bitalign(bw,br,ip);
return ip - in;
} }
#endif #endif