diff --git a/fp.c b/fp.c index 4e7c4b5..2db5fc5 100644 --- a/fp.c +++ b/fp.c @@ -21,7 +21,7 @@ - twitter : https://twitter.com/powturbo - email : powturbo [_AT_] gmail [_DOT_] com **/ -// "Floating Point/Integer Compression" +// "Floating Point + Integer Compression. " #ifndef USIZE #pragma warning( disable : 4005) #pragma warning( disable : 4090) @@ -64,6 +64,7 @@ #define bitget16(bw,br,_b_,_x_,_ip_) bitget(bw,br,_b_,_x_) #define bitget32(bw,br,_b_,_x_,_ip_) bitget(bw,br,_b_,_x_) #define bitget64(bw,br,_b_,_x_,_ip_) if((_b_)>45) { unsigned _v; bitget(bw,br,(_b_)-32,_x_); bitdnorm(bw,br,_ip_); bitget(bw,br,32,_v); _x_ = _x_<<32|_v; } else bitget(bw,br,_b_,_x_) + //------------------------------- #define VSIZE 128 @@ -73,21 +74,31 @@ #define NL 18 #define N4 17 // must be > 16 +#define N_0 3 +#define N_1 4 + #define N2 3 #define N3 5 #define USIZE 8 #include "fp.c" +#define N_0 3 +#define N_1 5 + #define N2 6 #define N3 12 #define USIZE 16 #include "fp.c" +#define N_0 4 +#define N_1 6 + #define N2 6 // for seconds time series #define N3 10 #define USIZE 32 #include "fp.c" +#define N_1 7 #define N2 6 // for seconds/milliseconds,... time series #define N3 12 #define N4 20 // must be > 16 @@ -95,20 +106,20 @@ #include "fp.c" #else -// Unlike almost floating point compressors, we are using the better zigzag encoding instead the XOR technique. -//#define ENCX(u,h,_usize_) ((u)^(h)) -//#define DECX(u,h,_usize_) ((u)^(h)) -#define ENCX(_u_ , _h_, _usize_) TEMPLATE2(zigzagenc,_usize_)((_u_)-(_h_)) -#define DECX(_u_ , _h_, _usize_) (TEMPLATE2(zigzagdec,_usize_)(_u_)+(_h_)) + +#define XENC(u,h,_usize_) ((u)^(h)) +#define XDEC(u,h,_usize_) ((u)^(h)) +#define ZENC(_u_ , _h_, _usize_) TEMPLATE2(zigzagenc,_usize_)((_u_)-(_h_)) +#define ZDEC(_u_ , _h_, _usize_) (TEMPLATE2(zigzagdec,_usize_)(_u_)+(_h_)) #define uint_t TEMPLATE3(uint, USIZE, _t) -//---- Last value Predictor +//---- Last value Predictor. (same as p4zenc) size_t TEMPLATE2(fppenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { uint_t _p[VSIZE+32], *ip, *p; unsigned char *op = out; - #define FE(i,_usize_) { TEMPLATE3(uint, _usize_, _t) u = ip[i]; p[i] = ENCX(u, start,_usize_); start = u; } + #define FE(i,_usize_) { TEMPLATE3(uint, _usize_, _t) u = ip[i]; p[i] = ZENC(u, start,_usize_); start = u; } for(ip = in; ip != in + (n&~(VSIZE-1)); ) { for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); } op = TEMPLATE2(P4ENC,USIZE)(_p, VSIZE, op); __builtin_prefetch(ip+512, 0); @@ -124,7 +135,7 @@ size_t TEMPLATE2(fppdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t uint_t *op, _p[VSIZE+32],*p; unsigned char *ip = in; - #define FD(i,_usize_) { TEMPLATE3(uint, USIZE, _t) u = DECX(p[i], start,_usize_); op[i] = u; start = u; } + #define FD(i,_usize_) { TEMPLATE3(uint, USIZE, _t) u = ZDEC(p[i], start,_usize_); op[i] = u; start = u; } for(op = out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0); for(ip = TEMPLATE2(P4DEC,USIZE)(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } } @@ -133,12 +144,12 @@ size_t TEMPLATE2(fppdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t return ip - in; } -// delta of delta -size_t TEMPLATE2(fpddenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { +// zigzag of delta +size_t TEMPLATE2(fpzzenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { uint_t _p[VSIZE+32], *ip, *p, pd = 0; unsigned char *op = out; - #define FE(i,_usize_) { TEMPLATE3(uint, USIZE, _t) u = ip[i]; start = u-start; p[i] = ENCX(start,pd,_usize_); pd = start; start = u; } + #define FE(i,_usize_) { TEMPLATE3(uint, USIZE, _t) u = ip[i]; start = u-start; p[i] = ZENC(start,pd,_usize_); pd = start; start = u; } for(ip = in; ip != in + (n&~(VSIZE-1)); ) { for(p = _p; p != &_p[VSIZE]; p+=4,ip+=4) { FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); } op = TEMPLATE2(P4ENC,USIZE)(_p, VSIZE, op); __builtin_prefetch(ip+512, 0); @@ -150,11 +161,11 @@ size_t TEMPLATE2(fpddenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t return op - out; } -size_t TEMPLATE2(fpdddec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { +size_t TEMPLATE2(fpzzdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { uint_t _p[VSIZE+32],*p, *op, pd=0; unsigned char *ip = in; - #define FD(i,_usize_) { TEMPLATE3(uint, USIZE, _t) u = DECX(p[i],start+pd,_usize_); op[i] = u; pd = u - start; start = u; } + #define FD(i,_usize_) { TEMPLATE3(uint, USIZE, _t) u = ZDEC(p[i],start+pd,_usize_); op[i] = u; pd = u - start; start = u; } for(op = out; op != out+(n&~(VSIZE-1)); ) { __builtin_prefetch(ip+512, 0); for(ip = TEMPLATE2(P4DEC,USIZE)(ip, VSIZE, _p), p = _p; p != &_p[VSIZE]; p+=4,op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); } } @@ -168,12 +179,13 @@ size_t TEMPLATE2(fpdddec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t #define HASH32(_h_,_u_) (((_h_)<<4 ^ (_u_)>>23) & ((1u<>12) & ((1u<> 5) & ((1u< 10 GB/s +size_t TEMPLATE2(fp2dfcmenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { // 2d fcm + uint_t *ip, _p[VSIZE+32], h = 0, *p, htab[1<>2, _n1_);\ + else bitrmv(bw,br,_n2_+2), _x_ = BZHI32(_x_>>2, _n2_);\ +} + size_t TEMPLATE2(fpgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { uint_t *ip; - unsigned ol = 0,ot = 0; + unsigned ol = 0,ot = 0; unsigned char *op = out; bitdef(bw,br); + if(start) { ol = TEMPLATE2(clz,USIZE)(start); ot = TEMPLATE2(ctz,USIZE)(start); } - #define FE(i,_usize_) { TEMPLATE3(uint, _usize_, _t) z = ENCX(ip[i], start,_usize_); start = ip[i];\ + #define FE(i,_usize_) { TEMPLATE3(uint, _usize_, _t) z = XENC(ip[i], start,_usize_); start = ip[i];\ if(likely(!z)) bitput( bw,br, 1, 1);\ - else { unsigned t = TEMPLATE2(ctz,_usize_)(z), l = TEMPLATE2(clz,_usize_)(z); l = l>31?31:l;\ - if(l >= ol && t >= ot) { bitput( bw,br, 2, 2); l = _usize_ - ol - ot; z>>=ot; TEMPLATE2(bitput,_usize_)(bw,br, l, z,op); }\ - else { bitput( bw,br, 2+6+5, (t-1)<<5|l); ol = _usize_ - l - t; z>>= t; TEMPLATE2(bitput,_usize_)(bw,br, ol, z,op); ol = l; ot = t; } \ + else { unsigned t = TEMPLATE2(ctz,_usize_)(z), l = TEMPLATE2(clz,_usize_)(z); /*l = l>31?31:l;*/\ + unsigned s = _usize_ - l - t, os = _usize_ - ol - ot;\ + if(l >= ol && t >= ot && os < 6+5+s) { bitput( bw,br, 2, 2); z>>=ot; TEMPLATE2(bitput,_usize_)(bw,br, os, z,op); }\ + else { bitput( bw,br, 2+6, l<<2); bitput2(bw,br, N_0, N_1, t); z>>= t; bitenorm(bw,br,op);TEMPLATE2(bitput,_usize_)(bw,br, s, z,op); ol = l; ot = t; } \ } bitenorm(bw,br,op);\ } for(ip = in; ip != in + (n&~(4-1)); ip+=4) { __builtin_prefetch(ip+512, 0); FE(0,USIZE); FE(1,USIZE); FE(2,USIZE); FE(3,USIZE); } @@ -250,14 +305,15 @@ size_t TEMPLATE2(fpgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t size_t TEMPLATE2(fpgdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { if(!n) return 0; uint_t *op; - unsigned ol = 0,ot = 0,x; + unsigned ol = 0,ot = 0,x; unsigned char *ip = in; bitdef(bw,br); - + if(start) { ol = TEMPLATE2(clz,USIZE)(start); ot = TEMPLATE2(ctz,USIZE)(start); } + #define FD(i,_usize_) { TEMPLATE3(uint, _usize_, _t) z=0; unsigned _x; bitget(bw,br,1,_x); \ if(likely(!_x)) { bitget(bw,br,1,_x);\ - if(!_x) { bitget(bw,br,11,_x); ot = (_x>>5)+1; ol = _x & 31; } TEMPLATE2(bitget,_usize_)(bw,br,_usize_ - ol - ot,z,ip); z<<=ot;\ - } op[i] = start = DECX(z, start,_usize_); bitdnorm(bw,br,ip);\ + if(!_x) { bitget(bw,br,6,ol); bitget2(bw,br, N_0, N_1, ot); bitdnorm(bw,br,ip);} TEMPLATE2(bitget,_usize_)(bw,br,_usize_ - ol - ot,z,ip); z<<=ot;\ + } op[i] = start = XDEC(z, start,_usize_); bitdnorm(bw,br,ip);\ } for(bitdnorm(bw,br,ip),op = out; op != out+(n&~(4-1)); op+=4) { FD(0,USIZE); FD(1,USIZE); FD(2,USIZE); FD(3,USIZE); __builtin_prefetch(ip+512, 0); } for( ; op != out+n; op++) FD(0,USIZE); @@ -265,11 +321,11 @@ size_t TEMPLATE2(fpgdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t return ip - in; } -// Improved Gorilla style compression with sliding double delta+zigzag encoding+RLE for timestamps in time series. -// Up to 300 times better compression and several times faster +// Improved Gorilla style compression with sliding zigzag of delta + RLE + overflow handling for timestamps in time series. +// more than 300 times better compression and several times faster #define OVERFLOW if(op >= out_) { *out++ = 1<<4; /*bitini(bw,br); bitput(bw,br,4+3,1<<4); bitflush(bw,br,out);*/ memcpy(out,in,n*sizeof(in[0])); return 1+n*sizeof(in[0]); } -size_t TEMPLATE2(bitgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { +size_t TEMPLATE2(bvzzenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { uint_t *ip = in, pd = 0, *pp = in,dd; unsigned char *op = out, *out_ = out+n*sizeof(in[0]); @@ -314,7 +370,7 @@ size_t TEMPLATE2(bitgenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t return op - out; } -size_t TEMPLATE2(bitgdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { if(!n) return 0; +size_t TEMPLATE2(bvzzdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { if(!n) return 0; uint_t *op = out, pd = 0; unsigned char *ip = in; @@ -369,6 +425,100 @@ size_t TEMPLATE2(bitgdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t bitalign(bw,br,ip); return ip - in; } + +// Zigzag delta with bit/io + RLE +size_t TEMPLATE2(bvzenc,USIZE)(uint_t *in, size_t n, unsigned char *out, uint_t start) { + uint_t *ip = in, *pp = in,dd; + unsigned char *op = out, *out_ = out+n*sizeof(in[0]); + + bitdef(bw,br); + #define FE(_pp_, _ip_, _d_, _op_,_usize_) do {\ + uint64_t _r = _ip_ - _pp_;\ + if(_r > NL) { _r -= NL; unsigned _b = (bsr64(_r)+7)>>3; bitput(bw,br,4+3+3,(_b-1)<<(4+3)); bitput64(bw,br,_b<<3, _r, _op_); bitenorm(bw,br,_op_); }\ + else while(_r--) { bitput(bw,br,1,1); bitenorm(bw,br,_op_); }\ + _d_ = TEMPLATE2(zigzagenc,_usize_)(_d_);\ + if(!_d_) bitput(bw,br, 1, 1);\ + else if(_d_ < (1<< (N2-1))) bitput(bw,br, N2+2,_d_<<2|2);\ + else if(_d_ < (1<< (N3-1))) bitput(bw,br, N3+3,_d_<<3|4);\ + else if(_d_ < (1<< (N4-1))) bitput(bw,br, N4+4,_d_<<4|8);\ + else { unsigned _b = (TEMPLATE2(bsr,_usize_)(_d_)+7)>>3; bitput(bw,br,4+3,(_b-1)<<4); TEMPLATE2(bitput,_usize_)(bw,br, _b<<3, _d_,_op_); }\ + bitenorm(bw,br,_op_);\ + } while(0) + + if(n > 4) + for(; ip < in+(n-1-4);) { + dd = ip[0] - start; start = ip[0]; if(dd) goto a; ip++; + dd = ip[0] - start; start = ip[0]; if(dd) goto a; ip++; + dd = ip[0] - start; start = ip[0]; if(dd) goto a; ip++; + dd = ip[0] - start; start = ip[0]; if(dd) goto a; ip++; __builtin_prefetch(ip+256, 0); + continue; + a:; + FE(pp,ip, dd, op,USIZE); + pp = ++ip; OVERFLOW; + } + + for(;ip < in+n;) { + dd = ip[0] - start; start = ip[0]; if(dd) goto b; ip++; + continue; + b:; + FE(pp,ip, dd, op,USIZE); + pp = ++ip; OVERFLOW; + } + if(ip > pp) { + dd = ip[0] - start; start = ip[0]; + FE(pp, ip, dd, op, USIZE); OVERFLOW; + } + bitflush(bw,br,op); + return op - out; +} + +size_t TEMPLATE2(bvzdec,USIZE)(unsigned char *in, size_t n, uint_t *out, uint_t start) { if(!n) return 0; + uint_t *op = out; + unsigned char *ip = in; + + bitdef(bw,br); + for(bitdnorm(bw,br,ip); op < out+n; ) { __builtin_prefetch(ip+384, 0); + #if USIZE == 64 + uint_t dd = bitpeek(bw,br); + #else + uint32_t dd = bitpeek(bw,br); + #endif + if(dd & 1) bitrmv(bw,br, 0+1), dd = 0; + else if(dd & 2) bitrmv(bw,br,N2+2), dd = BZHI32(dd>>2, N2); + else if(dd & 4) bitrmv(bw,br,N3+3), dd = BZHI32(dd>>3, N3); + else if(dd & 8) bitrmv(bw,br,N4+4), dd = BZHI32(dd>>4, N4); + else { + unsigned b; uint_t *_op; uint64_t r; + bitget(bw,br, 4+3, b); + if((b>>=4) <= 1) { + if(b==1) { // No compression, because of overflow + memcpy(out,in+1, n*sizeof(out[0])); + return 1+n*sizeof(out[0]); + } + bitget(bw,br,3,b); bitget64(bw,br,(b+1)<<3,r,ip); bitdnorm(bw,br,ip);//RLE //r+=NL; while(r--) *op++=(start+=pd); + #if defined(__SSE2__) && USIZE == 32 + __m128i sv = _mm_set1_epi32(start); + for(r += NL, _op = op; op != _op+(r&~7);) { + _mm_storeu_si128(op, sv); op += 4; + _mm_storeu_si128(op, sv); op += 4; + } + #else + for(r+=NL, _op = op; op != _op+(r&~7); op += 8) + op[0]=op[1]=op[2]=op[3]=op[4]=op[5]=op[6]=op[7]=start; + #endif + for(; op != _op+r; op++) + *op = start; + continue; + } + TEMPLATE2(bitget,USIZE)(bw,br,(b+1)<<3,dd,ip); + } + dd = TEMPLATE2(zigzagdec,USIZE)(dd); + *op++ = (start += dd); + bitdnorm(bw,br,ip); + } + bitalign(bw,br,ip); + return ip - in; +} #undef USIZE #endif - \ No newline at end of file +