diff --git a/fp.c b/fp.c index 1d02245..dc9cc31 100644 --- a/fp.c +++ b/fp.c @@ -39,7 +39,7 @@ #define DEC64(u,h) zigzagdec64(u)+(int64_t)h //---- Last value Predictor -unsigned char *fppenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t start) { +unsigned char *fppenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { uint64_t *ip, _p[VSIZE], *p; #define FE64(i) { uint64_t u = ip[i]; p[i] = ENC64(u, start); start = u; } @@ -54,7 +54,7 @@ unsigned char *fppenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t s return out; } -unsigned char *fppdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t start) { +unsigned char *fppdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { uint64_t *op, _p[VSIZE+32],*p; #define FD64(i) { uint64_t u = DEC64(p[i], start); op[i] = u; start = u; } @@ -67,7 +67,7 @@ unsigned char *fppdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t s } // delta of delta -unsigned char *fpddenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t start) { +unsigned char *fpddenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { uint64_t *ip, _p[VSIZE], *p; int64_t pd=0; #define FE64(i) { uint64_t u = ip[i],d = u-start; p[i] = ENC64((int64_t)d,pd); pd = d; start = u; } @@ -82,7 +82,7 @@ unsigned char *fpddenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t return out; } -unsigned char *fpdddec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t start) { +unsigned char *fpdddec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { uint64_t *op, h = 0, _p[VSIZE+32],*p, pd=0; #define FD64(i) { uint64_t u = DEC64(p[i],start+pd); op[i] = u; pd = u - start; start = u; } @@ -97,7 +97,7 @@ unsigned char *fpdddec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t #define HBITS 13 //15 #define HASH64(_h_,_u_) (((_h_)<<5 ^ (_u_)>>50) & ((1u<>3)), _br_ &= 7 #define bitalign( _bw_,_br_,_ip_) _ip_ += (_br_+7)>>3 -#define bitput64(bw,br,_b_,_x_,_op_) if(_b_>45) { bitput(bw,br,_b_-32, (_x_)>>32); bitenorm(bw,br,_op_); bitput(bw,br,32,(unsigned)(_x_)); } else bitput(bw,br,_b_,_x_); -#define bitget64(bw,br,_b_,_x_,_ip_) if(_b_>45) { unsigned _v; bitget(bw,br,_b_-32,_x_); bitdnorm(bw,br,_ip_); bitget(bw,br,32,_v); _x_ = _x_<<32|_v; } else bitget(bw,br,_b_,_x_); +#define bitput64(bw,br,_b_,_x_,_op_) if((_b_)>45) { bitput(bw,br,(_b_)-32, (_x_)>>32); bitenorm(bw,br,_op_); bitput(bw,br,32,(unsigned)(_x_)); } else bitput(bw,br,_b_,_x_); +#define bitget64(bw,br,_b_,_x_,_ip_) if((_b_)>45) { unsigned _v; bitget(bw,br,(_b_)-32,_x_); bitdnorm(bw,br,_ip_); bitget(bw,br,32,_v); _x_ = _x_<<32|_v; } else bitget(bw,br,_b_,_x_); // Fastest Gorilla (see Facebook paper) Floating point/Integer compression implementation using zigzag encoding instead of XOR. Compression 5 GB/s, Decompression: 10 GB/s -unsigned char *fpgenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t start) { +unsigned char *fpgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { uint64_t *ip; unsigned ol = 0,ot = 0; bitdef(bw,br); @@ -196,7 +196,7 @@ unsigned char *fpgenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t s return out; } -unsigned char *fpgdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t start) { if(!n) return in; +unsigned char *fpgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; uint64_t *op; unsigned ol = 0,ot = 0,x; bitdef(bw,br); @@ -212,10 +212,183 @@ unsigned char *fpgdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t s } // Improved Gorilla style compression with sliding double delta for timestamps in time series. +// Up to 300 times better compression and several times faster #define N2 7 // for seconds time series #define N3 9 -#define N4 12 -unsigned char *bitgenc32(uint32_t *in, unsigned n, unsigned char *out, uint32_t start) { +#define N4 12 +#define NL 18 + +#define ENC32(_pp_, _ip_, _d_, _op_) do {\ + size_t _r = _ip_ - _pp_;\ + if(_r > NL) { _r -= NL; unsigned _b = (bsr32(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,out); }\ + else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,out); }\ + _d_ = zigzagenc32(_d_);\ + if(!_d_) bitput(bw,br, 1, 1);\ + else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\ + else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\ + else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\ + else { unsigned _b = (bsr32(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); bitput(bw,br, _b<<3, _d_); }\ + bitenorm(bw,br,out);\ +} while(0) + +unsigned char *bitgenc32(uint32_t *in, size_t n, unsigned char *out, uint32_t start) { + uint32_t *ip = in, pd = 0, *pp = in,dd; + + bitdef(bw,br); + if(n > 4) + for(; ip < in+(n-1-4);) { + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0); + continue; + a:; + ENC32(pp,ip, dd, out); + pp = ++ip; + } + + for(;ip < in+n;) { + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++; + continue; + b:; + ENC32(pp,ip, dd, out); + pp = ++ip; + } + if(ip > pp) { + start = ip[0] - start; dd = start-pd; + ENC32(pp, ip, dd, out); + } + bitflush(bw,br,out); + return out; +} + +unsigned char *bitgdec32(unsigned char *in, size_t n, uint32_t *out, uint32_t start) { if(!n) return in; + uint32_t *op = out, pd = 0; + bitdef(bw,br); + for(bitdnorm(bw,br,in); op < out+n; ) { __builtin_prefetch(in+384, 0); + uint32_t dd = bitpeek(bw,br); + if(dd & 1) bitrmv(bw,br, 0+1), dd = 0; + else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u32(dd>>2, N2); + else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u32(dd>>3, N3); + else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u32(dd>>4, N4); + else { + unsigned b,*_op; size_t r; + bitget(bw,br, 4+3, b); + if(!b) { + bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,in); bitdnorm(bw,br,in); + for(r+=NL, _op = op; op != _op+(r&~7); op += 8) + op[0]=(start+=pd), + op[1]=(start+=pd), + op[2]=(start+=pd), + op[3]=(start+=pd), + op[4]=(start+=pd), + op[5]=(start+=pd), + op[6]=(start+=pd), + op[7]=(start+=pd); + for(; op != _op+r; op++) + *op = (start+=pd); + continue; + } + bitget(bw,br,((b>>4)+1)*8,dd); + } + pd += zigzagdec32(dd); + *op++ = (start += pd); + bitdnorm(bw,br,in); + } + return bitalign(bw,br,in); +} + +#define N2 6 // for seconds/milliseconds,... time series +#define N3 12 +#define N4 20 + +#define ENC64(_pp_, _ip_, _d_, _op_) do {\ + uint64_t _r = _ip_ - _pp_;\ + if(_r > NL) { _r -= NL; unsigned _b = (bsr64(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,out); }\ + else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,out); }\ + _d_ = zigzagenc64(_d_);\ + if(!_d_) bitput(bw,br, 1, 1);\ + else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\ + else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\ + else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\ + else { unsigned _b = (bsr64(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); bitput64(bw,br, _b<<3, _d_,_op_); }\ + bitenorm(bw,br,out);\ +} while(0) + +unsigned char *bitgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { + uint64_t *ip = in, pd = 0, *pp = in,dd; + + bitdef(bw,br); + if(n > 4) + for(; ip < in+(n-1-4);) { + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0); + continue; + a:; + ENC64(pp,ip, dd, out); + pp = ++ip; + } + + for(;ip < in+n;) { + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++; + continue; + b:; + ENC64(pp,ip, dd, out); + pp = ++ip; + } + if(ip > pp) { + start = ip[0] - start; dd = start-pd; + ENC64(pp, ip, dd, out); + } + bitflush(bw,br,out); + return out; +} + +unsigned char *bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; + uint64_t *op = out, pd = 0; + bitdef(bw,br); + for(bitdnorm(bw,br,in); op < out+n; ) { __builtin_prefetch(in+384, 0); + uint64_t dd = bitpeek(bw,br); + if(dd & 1) bitrmv(bw,br, 0+1), dd = 0; + else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u64(dd>>2, N2); + else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u64(dd>>3, N3); + else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u64(dd>>4, N4); + else { + unsigned b; uint64_t r,*_op; + bitget(bw,br, 4+3, b); + if(!b) { + bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,in); bitdnorm(bw,br,in); + //r+=NL; while(r--) *op++=(start+=pd); + for(r+=NL, _op = op; op != _op+(r&~7); op += 8) + op[0]=(start+=pd), + op[1]=(start+=pd), + op[2]=(start+=pd), + op[3]=(start+=pd), + op[4]=(start+=pd), + op[5]=(start+=pd), + op[6]=(start+=pd), + op[7]=(start+=pd); + for(; op != _op+r; op++) + *op = (start+=pd); + continue; + } + bitget64(bw,br,((b>>4)+1)*8,dd,in); + } + pd += zigzagdec64(dd); + *op++ = (start += pd); + bitdnorm(bw,br,in); + } + return bitalign(bw,br,in); +} + +#if 0 +// Initial implementation without RLE +#define N2 7 // for seconds time series +#define N3 9 +#define N4 12 +unsigned char *bitg0enc32(uint32_t *in, size_t n, unsigned char *out, uint32_t start) { uint32_t *ip, pd = 0; bitdef(bw,br); @@ -233,7 +406,7 @@ unsigned char *bitgenc32(uint32_t *in, unsigned n, unsigned char *out, uint32_t return out; } -unsigned char *bitgdec32(unsigned char *in, unsigned n, uint32_t *out, uint32_t start) { if(!n) return in; +unsigned char *bitg0dec32(unsigned char *in, size_t n, uint32_t *out, uint32_t start) { if(!n) return in; uint32_t *op, pd = 0; bitdef(bw,br); @@ -253,11 +426,11 @@ unsigned char *bitgdec32(unsigned char *in, unsigned n, uint32_t *out, uint32_t #define N2 6 // for seconds/milliseconds,... time series #define N3 12 #define N4 20 -unsigned char *bitgenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t start) { +unsigned char *bitg0enc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { uint64_t *ip, pd = 0; bitdef(bw,br); - #define FE64(i) { uint64_t dd; start = ip[i] - start; dd = start-pd; pd = start; dd = zigzagenc64(dd); start = ip[i];\ + #define FE64(i) { uint64_t dd; start = (int64_t)ip[i] - (int64_t)start; dd = (int64_t)start-(int64_t)pd; pd = start; dd = zigzagenc64(dd); start = ip[i];\ if(!dd) bitput(bw,br, 1, 1);\ else if(dd < (1<< (N2-1))) bitput(bw,br, N2+2,dd<<2|2);\ else if(dd < (1<< (N3-1))) bitput(bw,br, N3+3,dd<<3|4);\ @@ -271,7 +444,7 @@ unsigned char *bitgenc64(uint64_t *in, unsigned n, unsigned char *out, uint64_t return out; } -unsigned char *bitgdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t start) { if(!n) return in; +unsigned char *bitg0dec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; uint64_t *op, pd = 0; bitdef(bw,br); @@ -280,10 +453,81 @@ unsigned char *bitgdec64(unsigned char *in, unsigned n, uint64_t *out, uint64_t else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u64(dd>>2, N2);\ else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u64(dd>>3, N3);\ else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u64(dd>>4, N4);\ - else { unsigned _b; bitget(bw,br,4+3,_b); bitget64(bw,br,((_b>>4)+1)*8,dd,in); }\ - pd += zigzagdec64(dd); op[i] = (start += pd); bitdnorm(bw,br,in);\ + else { unsigned _b; bitget(bw,br,4+3,_b); _b = ((_b>>4)+1)*8; bitget64(bw,br,_b,dd,in); }\ + pd += zigzagdec64(dd); start += pd; op[i] = start; bitdnorm(bw,br,in);\ } for(bitdnorm(bw,br,in),op = out; op != out+(n&~(4-1)); op+=4) { FD64(0); FD64(1); FD64(2); FD64(3); __builtin_prefetch(in+512, 0); } for(; op != out+n; op++) FD64(0); return bitalign(bw,br,in); } +#endif + +/*unsigned char *bitgenc64(uint64_t *in, size_t n, unsigned char *out, uint64_t start) { + uint64_t *ip = in, pd = 0, *pp = in,dd; + + bitdef(bw,br); + if(n > 4) + for(; ip < in+(n-1-4);) { + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0); + continue; + a:; + ENC64(pp,ip, dd, out); + pp = ++ip; + } + + for(;ip < in+n;) { + start = ip[0] - start; dd = start-pd; pd = start; start = ip[0]; if(dd) goto b; ip++; continue; + b: ENC64(pp,ip, dd, out); + pp = ++ip; + } + if(ip > pp) { + start = ip[0] - start; dd = start-pd; + ENC64(pp, ip, dd, out); + } + bitflush(bw,br,out); + return out; +} + +unsigned char *bitgdec64(unsigned char *in, size_t n, uint64_t *out, uint64_t start) { if(!n) return in; + uint64_t *op = out, pd = 0; + bitdef(bw,br); + for(bitdnorm(bw,br,in); op < out+n; ) { __builtin_prefetch(in+384, 0); + uint32_t dd = bitpeek(bw,br); + if(dd & 1) bitrmv(bw,br, 0+1), dd = 0; + else if(dd & 2) bitrmv(bw,br,N2+2), dd = _bzhi_u32(dd>>2, N2); + else if(dd & 4) bitrmv(bw,br,N3+3), dd = _bzhi_u32(dd>>3, N3); + else if(dd & 8) bitrmv(bw,br,N4+4), dd = _bzhi_u32(dd>>4, N4); + else { + unsigned b; size_t r; uint64_t *_op; + bitget(bw,br, 4+3, b); + if(!b) { + bitget(bw,br,3,b); bitget64(bw,br,(b+1)*8,r,in); bitdnorm(bw,br,in); + for(r+=NL, _op = op; op != _op+(r&~7); op += 8) + op[0]=(start+=pd), + op[1]=(start+=pd), + op[2]=(start+=pd), + op[3]=(start+=pd), + op[4]=(start+=pd), + op[5]=(start+=pd), + op[6]=(start+=pd), + op[7]=(start+=pd); + for(; op != _op+r; op++) + *op = (start+=pd); + continue; + } + uint64_t dd; + bitget64(bw,br,((b>>4)+1)*8,dd,in); + pd += zigzagdec64(dd); + *op++ = (start += pd); + bitdnorm(bw,br,in); continue; + } + pd += zigzagdec32(dd); + *op++ = (start += pd); + bitdnorm(bw,br,in); + } + return bitalign(bw,br,in); +} +*/