.
This commit is contained in:
13
README.md
13
README.md
@ -26,6 +26,7 @@ TurboPFor: Fastest Integer Compression [ implementation
|
||||
- :new: **now up to 25% more faster**
|
||||
<p>
|
||||
+ **Simple family**
|
||||
- :sparkles: **Novel** **"Variable Simple"** (incl. **RLE**) faster and more efficient than simple16, simple-8b
|
||||
@ -71,7 +72,7 @@ CPU: Sandy bridge i7-2600k at 4.2GHz, gcc 5.1, ubuntu 15.04, single thread.
|
||||
| 99.910.930| 24.98| 7.99| 2524.50|1943.41|[SIMDPack FPF](#FastPFor)|
|
||||
| 99.910.930| 24.98| 7.99| 1883.21|1898.11|**TurboPack**|
|
||||
| 99.910.930| 24.98| 7.99| 1877.25| 935.83|**TurboForDA**|
|
||||
|102.074.663| 25.52| 8.17| 1621.64|1694.64|**TurboVbyte**|
|
||||
|102.074.663| 25.52| 8.17| 1993.95|1827.04|**TurboVbyte**|
|
||||
|102.074.663| 25.52|8.17|1214.12|1688.95|[MaskedVByte](#MaskedVByte)|
|
||||
|102.074.663| 25.52| 8.17| 1178.72| 949.59|[Vbyte FPF](#FastPFor)|
|
||||
|103.035.930| 25.76| 8.24| 1480.47|1746.51|[libfor](#libfor)|
|
||||
@ -90,16 +91,16 @@ CPU: Skylake i7-6700 w/ only 3.7GHz
|
||||
| 63392801| 15.85| 5.07| 387.30| 243.62|**TurboPForDA**|
|
||||
| 65359916| 16.34| 5.23| 7.58| 609.12|OptPFD|
|
||||
| 73477088| 18.37| 5.88| 101.68| 621.37|Simple16|
|
||||
| 78514276| 19.63| 6.28|256.83|676.45|**VSimple**|
|
||||
| 95915096| 23.98| 7.67| 211.79|954.62|Simple-8b|
|
||||
| 78514276| 19.63| 6.28|258.31|691.48|**VSimple**|
|
||||
| 95915096| 23.98| 7.67| 211.79|957.62|Simple-8b|
|
||||
| 98546814| 24.64| 7.88| 70.85|**2349.71**|[QMX](#QMX)|
|
||||
| 99910930| 24.98| 7.99|**3537.57**|**3081.79**|**TurboPackV**|
|
||||
| 99910930| 24.98| 7.99| 3099.52|3071.77|SIMDPack FPF|
|
||||
| 99910930| 24.98| 7.99| 2050.47|2402.54|**TurboPack**|
|
||||
| 99910930| 24.98| 7.99| 2095.79|2495.22|**TurboPack**|
|
||||
| 99910930| 24.98| 7.99| 2049.85|2364.52|**TurboFor**|
|
||||
| 99910930| 24.98| 7.99| 2049.70|1124.12|**TurboForDA**|
|
||||
|102074663| 25.52| 8.17| 1354.42|1745.69|MaskedVByte|
|
||||
|102074663| 25.52| 8.17| 1660.76|1626.67|**TurboVbyte**|
|
||||
|102074663| 25.52| 8.17| 1825.64|1844.34|**TurboVbyte**|
|
||||
|102074663| 25.52| 8.17| 1249.77|1051.85|Vbyte FPF|
|
||||
|112500000| 28.12| 9.00| 466.94|3003.70|VarintG8IU|
|
||||
|128125000| 32.03| 10.25| 1109.67|1271.32|[StreamVbyte FPF](#FastPFor)|
|
||||
@ -310,4 +311,4 @@ header files to use with documentation:<br />
|
||||
- [On Inverted Index Compression for Search Engine Efficiency](http://www.dcs.gla.ac.uk/~craigm/publications/catena14compression.pdf)
|
||||
- [Google's Group Varint Encoding](http://static.googleusercontent.com/media/research.google.com/de//people/jeff/WSDM09-keynote.pdf)
|
||||
|
||||
Last update: 27 MAR 2016
|
||||
Last update: 08 APR 2016
|
||||
|
||||
24
bitunpack.c
24
bitunpack.c
@ -35,17 +35,17 @@
|
||||
#define DSTI(__op)
|
||||
#define BPI(__w, __x, __parm) __w
|
||||
#include __FILE__
|
||||
unsigned char *bitunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return ip; }
|
||||
unsigned char *bitunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return ip; }
|
||||
unsigned char *bitunpack64( const unsigned char *__restrict in, unsigned n, uint64_t *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK64(in, n, b, out, 0); return ip; }
|
||||
unsigned char *bitunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return (unsigned char *)ip; }
|
||||
unsigned char *bitunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return (unsigned char *)ip; }
|
||||
unsigned char *bitunpack64( const unsigned char *__restrict in, unsigned n, uint64_t *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK64(in, n, b, out, 0); return (unsigned char *)ip; }
|
||||
#undef BPI
|
||||
#undef DSTI
|
||||
//-----------------------------------------------------------------------------------------------------------------
|
||||
#define DSTI(__op)
|
||||
#define BPI(__w, __x, __parm) (__parm += (__w) + 1)
|
||||
#include __FILE__
|
||||
unsigned char *bitd1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitd1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitd1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
unsigned char *bitd1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
#undef BPI
|
||||
#undef DSTI
|
||||
|
||||
@ -53,8 +53,8 @@ unsigned char *bitd1unpack16(const unsigned char *__restrict in, unsigned n, uns
|
||||
#define DSTI(__op)
|
||||
#define BPI(__w, __x, __parm) (__parm += (__w))
|
||||
#include __FILE__
|
||||
unsigned char *bitdunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitdunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitdunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
unsigned char *bitdunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
#undef BPI
|
||||
#undef DSTI
|
||||
|
||||
@ -63,7 +63,7 @@ unsigned char *bitdunpack16( const unsigned char *__restrict in, unsigned n, uns
|
||||
#define DSTI(__op)
|
||||
#define BPI(__w, __x, __parm) (__parm += zigzagdec32(__w))
|
||||
#include __FILE__
|
||||
unsigned char *bitzunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitzunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
//unsigned char *bitzunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
#undef BPI
|
||||
#undef DSTI
|
||||
@ -73,8 +73,8 @@ unsigned char *bitzunpack32( const unsigned char *__restrict in, unsigned n, uns
|
||||
#define BPI(__w, __x, __parm) (__parm + (__w))
|
||||
#include __FILE__
|
||||
|
||||
unsigned char *bitfunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitfunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitfunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
unsigned char *bitfunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
#undef BPI
|
||||
#undef DSTI
|
||||
|
||||
@ -82,8 +82,8 @@ unsigned char *bitfunpack16( const unsigned char *__restrict in, unsigned n, uns
|
||||
#define DSTI(__op) start += 32
|
||||
#define BPI(__w, __x, __parm) (__parm + (__w)+__x+1)
|
||||
#include __FILE__
|
||||
unsigned char *bitf1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitf1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
|
||||
unsigned char *bitf1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
unsigned char *bitf1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
|
||||
#undef BPI
|
||||
#undef DSTI
|
||||
|
||||
|
||||
14
bitunpackv.c
14
bitunpackv.c
@ -78,7 +78,7 @@ unsigned char *bitunpackv32( const unsigned char *__restrict in, unsigned n, uns
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
@ -116,7 +116,7 @@ unsigned char *_bitunpackv32( const unsigned char *__restrict in, unsigned n, un
|
||||
const unsigned char *ip = in+PAD8(n*b); unsigned m;
|
||||
__m128i sv;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
@ -134,7 +134,7 @@ unsigned char *bitzunpackv32( const unsigned char *__restrict in, unsigned n, un
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv = _mm_set1_epi32(start);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef BITUNPACK0
|
||||
@ -149,7 +149,7 @@ unsigned char *bitdunpackv32( const unsigned char *__restrict in, unsigned n, un
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv = _mm_set1_epi32(start);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
@ -171,7 +171,7 @@ unsigned char *_bitdunpackv32( const unsigned char *__restrict in, unsigned n, u
|
||||
const unsigned char *ip = in+PAD8(n*b); unsigned m;
|
||||
__m128i sv = _mm_set1_epi32(start);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
@ -188,7 +188,7 @@ unsigned char *bitd1unpackv32( const unsigned char *__restrict in, unsigned n, u
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(4,3,2,1);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
@ -209,7 +209,7 @@ unsigned char *_bitd1unpackv32( const unsigned char *__restrict in, unsigned n,
|
||||
const unsigned char *ip = in+PAD8(n*b); unsigned m;
|
||||
__m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(4,3,2,1);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
return (unsigned char *)ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
|
||||
136
bitutil.c
136
bitutil.c
@ -1,5 +1,5 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2013-2015
|
||||
Copyright (C) powturbo 2013-2016
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -33,19 +33,22 @@
|
||||
_x = (*_p)-__start-__inc; __start = *_p++; __act;\
|
||||
_x = (*_p)-__start-__inc; __start = *_p++; __act;\
|
||||
}\
|
||||
while(_p < __p+__n) { \
|
||||
while(_p != __p+__n) { \
|
||||
_x = *_p-__start-__inc; __start = *_p++; __act;\
|
||||
}\
|
||||
}
|
||||
|
||||
#define BITUNDELTA(__p, __n, __start, __inc) { typeof(__p[0]) *_p;\
|
||||
#define BITUNDELTA(__p, __n, __start, __inc) {\
|
||||
typeof(__p[0]) *_p;\
|
||||
for(_p = __p; _p != __p+(__n&~(4-1)); ) {\
|
||||
*_p = (__start += (*_p) + __inc); _p++;\
|
||||
*_p = (__start += (*_p) + __inc); _p++;\
|
||||
*_p = (__start += (*_p) + __inc); _p++;\
|
||||
*_p = (__start += (*_p) + __inc); _p++;\
|
||||
}\
|
||||
while(_p < __p+__n) { *_p = (__start += (*_p) + __inc); _p++; }\
|
||||
while(_p != __p+__n) {\
|
||||
*_p = (__start += (*_p) + __inc); _p++;\
|
||||
}\
|
||||
}
|
||||
|
||||
#define BITMINMAX(__p,__n, __mi, __mx) {\
|
||||
@ -56,7 +59,7 @@
|
||||
if(*_p < __mi) __mi = *_p; if(*_p > __mx) __mx = *_p; _p++; \
|
||||
if(*_p < __mi) __mi = *_p; if(*_p > __mx) __mx = *_p; _p++; \
|
||||
}\
|
||||
while(_p < __p+__n) { \
|
||||
while(_p != __p+__n) { \
|
||||
if(*_p < __mi) __mi = *_p; if(*_p > __mx) __mx = *_p; _p++; \
|
||||
}\
|
||||
}
|
||||
@ -65,29 +68,36 @@ unsigned bitdelta32(unsigned *in, unsigned n, unsigned *out, unsigned start, uns
|
||||
#ifdef __SSE2__
|
||||
unsigned *ip,b,*op = out;
|
||||
__m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(inc), dv;
|
||||
for(ip = in; ip != in+(n&~(4-1)); ip += 4) {
|
||||
for(ip = in; ip != in+(n&~(4-1)); ip += 4,op += 4) {
|
||||
__m128i iv = _mm_loadu_si128((__m128i *)ip);
|
||||
bv = _mm_or_si128(bv, dv = _mm_sub_epi32(DELTA128_32(iv,sv),cv));
|
||||
sv = iv;
|
||||
_mm_storeu_si128((__m128i *)op, dv);
|
||||
op += 4;
|
||||
}
|
||||
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
HOR128_32(bv, b);
|
||||
while(ip < in+n) { unsigned x = *ip-start-inc; start = *ip++; b |= x; *op++ = x; }
|
||||
while(ip != in+n) {
|
||||
unsigned x = *ip-start-inc;
|
||||
start = *ip++;
|
||||
b |= x;
|
||||
*op++ = x;
|
||||
}
|
||||
#else
|
||||
typeof(in[0]) b = 0,*op = out; BITDELTA(in, n, inc, start, b |= _x;*op++ = _x);
|
||||
typeof(in[0]) b = 0,*op = out;
|
||||
BITDELTA(in, n, inc, start, b |= _x;*op++ = _x);
|
||||
#endif
|
||||
return bsr32(b);
|
||||
}
|
||||
|
||||
unsigned bitdelta64(uint64_t *in, unsigned n, uint64_t *out, uint64_t start, unsigned inc) {
|
||||
typeof(in[0]) b = 0,*op = out; BITDELTA(in, n, inc, start, b |= _x; *op++ = _x);
|
||||
typeof(in[0]) b = 0,*op = out;
|
||||
BITDELTA(in, n, inc, start, b |= _x; *op++ = _x);
|
||||
return bsr64(b);
|
||||
}
|
||||
|
||||
unsigned bit32(unsigned *in, unsigned n) {
|
||||
typeof(in[0]) b; BITSIZE32(in, n, b);
|
||||
typeof(in[0]) b;
|
||||
BITSIZE32(in, n, b);
|
||||
return b;
|
||||
}
|
||||
|
||||
@ -119,13 +129,14 @@ unsigned bitd32(unsigned *in, unsigned n, unsigned start) {
|
||||
|
||||
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
HOR128_32(bv, b);
|
||||
while(ip < in+n) {
|
||||
while(ip != in+n) {
|
||||
unsigned x = *ip-start;
|
||||
start = *ip++;
|
||||
b |= x;
|
||||
}
|
||||
#else
|
||||
typeof(in[0]) b = 0; BITDELTA(in,n, 0, start, b |= _x);
|
||||
typeof(in[0]) b = 0;
|
||||
BITDELTA(in,n, 0, start, b |= _x);
|
||||
#endif
|
||||
return bsr32(b);
|
||||
}
|
||||
@ -141,13 +152,14 @@ unsigned bitd132(unsigned *in, unsigned n, unsigned start) {
|
||||
|
||||
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
HOR128_32(bv, b);
|
||||
while(ip < in+n) {
|
||||
while(ip != in+n) {
|
||||
unsigned x = *ip-start-1;
|
||||
start = *ip++;
|
||||
b |= x;
|
||||
}
|
||||
#else
|
||||
typeof(in[0]) b = 0; BITDELTA(in, n, 1, start, b |= _x);
|
||||
typeof(in[0]) b = 0;
|
||||
BITDELTA(in, n, 1, start, b |= _x);
|
||||
#endif
|
||||
return bsr32(b);
|
||||
}
|
||||
@ -159,14 +171,13 @@ void bitund132(unsigned *p, unsigned n, unsigned x) {
|
||||
#ifdef __SSE2__
|
||||
__m128i sv = _mm_set1_epi32(x), cv = _mm_set_epi32(4,3,2,1);
|
||||
unsigned *ip;
|
||||
for(ip = p; ip != p+(n&~(4-1)); ) {
|
||||
for(ip = p; ip != p+(n&~(4-1)); ip += 4) {
|
||||
__m128i v = _mm_loadu_si128((__m128i *)ip);
|
||||
SCANI128_32(v, sv, cv);
|
||||
_mm_storeu_si128((__m128i *)ip, sv);
|
||||
ip += 4;
|
||||
}
|
||||
x = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
while(ip < p+n) {
|
||||
while(ip != p+n) {
|
||||
*ip = (x += (*ip) + 1);
|
||||
ip++;
|
||||
}
|
||||
@ -188,18 +199,21 @@ void bitundx64(uint64_t *p, unsigned n, uint64_t x, unsigned inc) { BITUNDELTA(p
|
||||
_x = ((int)(*_p)-(int)__start); _x = (_x << 1) ^ (_x >> (sizeof(_x)*8-1)); __start = *_p++; __act;\
|
||||
}\
|
||||
while(_p != __p+__n) { \
|
||||
_x = ((int)(*_p)-(int)__start); _x = (_x << 1) ^ (_x >> (sizeof(_x)*8-1)); __start = *_p++; __act;\
|
||||
_x = ((int)(*_p)-(int)__start); _x = (_x << 1) ^ (_x >> (sizeof(_x)*8-1)); __start = *_p++; __act;\
|
||||
}\
|
||||
}
|
||||
|
||||
#define BITUNZIGZAG(__p, __n, __start) { typeof(__p[0]) *_p, _z;\
|
||||
#define BITUNZIGZAG(__p, __n, __start) {\
|
||||
typeof(__p[0]) *_p, _z;\
|
||||
for(_p = __p; _p != __p+(__n&~(4-1)); ) {\
|
||||
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
|
||||
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
|
||||
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
|
||||
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
|
||||
}\
|
||||
while(_p != __p+__n) { _z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++; }\
|
||||
while(_p != __p+__n) {\
|
||||
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
|
||||
}\
|
||||
}
|
||||
|
||||
unsigned bitz32(unsigned *in, unsigned n, unsigned start) {
|
||||
@ -216,10 +230,15 @@ unsigned bitz32(unsigned *in, unsigned n, unsigned start) {
|
||||
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
HOR128_32(bv, b);
|
||||
while(ip != in+n) {
|
||||
int x = ((int)(*ip)-(int)start); x = (x << 1) ^ (x >> 31); start = *ip++; b |= x;
|
||||
int x = ((int)(*ip)-(int)start);
|
||||
x = (x << 1) ^ (x >> 31);
|
||||
start = *ip++;
|
||||
b |= x;
|
||||
}
|
||||
#else
|
||||
typeof(in[0]) b = 0,*op = out; int _x; BITZIGZAG(in, n, start, b |= (unsigned)_x);
|
||||
typeof(in[0]) b = 0,*op = out;
|
||||
int _x;
|
||||
BITZIGZAG(in, n, start, b |= (unsigned)_x);
|
||||
#endif
|
||||
return bsr32(b);
|
||||
}
|
||||
@ -228,22 +247,27 @@ unsigned bitzigzag32(unsigned *in, unsigned n, unsigned *out, unsigned start) {
|
||||
#ifdef __SSE2__
|
||||
unsigned *ip,b,*op = out;
|
||||
__m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start), dv;
|
||||
for(ip = in; ip != in+(n&~(4-1)); ip += 4) {
|
||||
for(ip = in; ip != in+(n&~(4-1)); ip += 4,op += 4) {
|
||||
__m128i iv = _mm_loadu_si128((__m128i *)ip);
|
||||
dv = DELTA128_32(iv,sv);
|
||||
sv = iv;
|
||||
dv = ZIGZAG128_32(dv);
|
||||
bv = _mm_or_si128(bv, dv);
|
||||
_mm_storeu_si128((__m128i *)op, dv);
|
||||
op += 4;
|
||||
}
|
||||
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
HOR128_32(bv, b);
|
||||
while(ip != in+n) {
|
||||
int x = ((int)(*ip)-(int)start); x = (x << 1) ^ (x >> 31); start = *ip++; b |= x; *op++ = x;
|
||||
int x = ((int)(*ip)-(int)start);
|
||||
x = (x << 1) ^ (x >> 31);
|
||||
start = *ip++;
|
||||
b |= x;
|
||||
*op++ = x;
|
||||
}
|
||||
#else
|
||||
typeof(in[0]) b = 0,*op = out; int _x; BITZIGZAG(in, n, start, b |= (unsigned)_x; *op++ = _x);
|
||||
typeof(in[0]) b = 0, *op = out;
|
||||
int _x;
|
||||
BITZIGZAG(in, n, start, b |= (unsigned)_x; *op++ = _x);
|
||||
#endif
|
||||
return bsr32(b);
|
||||
}
|
||||
@ -252,61 +276,81 @@ void bitunzigzag32(unsigned *p, unsigned n, unsigned start) {
|
||||
#ifdef __SSE2__
|
||||
__m128i sv = _mm_set1_epi32(start); //, c1 = _mm_set1_epi32(1), cz = _mm_setzero_si128();
|
||||
unsigned *ip;
|
||||
for(ip = p; ip != p+(n&~(4-1)); ) {
|
||||
for(ip = p; ip != p+(n&~(4-1)); ip += 4) {
|
||||
__m128i iv = _mm_loadu_si128((__m128i *)ip);
|
||||
iv = UNZIGZAG128_32(iv);
|
||||
SCAN128_32(iv, sv);
|
||||
_mm_storeu_si128((__m128i *)ip, sv);
|
||||
ip += 4;
|
||||
}
|
||||
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
|
||||
while(ip != p+n) {
|
||||
unsigned z = *ip; *ip = (start += (z >> 1 ^ -(z & 1))); ip++;
|
||||
unsigned z = *ip;
|
||||
*ip++ = (start += (z >> 1 ^ -(z & 1)));
|
||||
}
|
||||
#else
|
||||
BITUNZIGZAG(p, n, start);
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned bitzigzag64(unsigned *in, unsigned n, unsigned *out, unsigned start) {
|
||||
typeof(in[0]) b = 0,*op = out; long long _x; BITZIGZAG(in, n, start, b |= (unsigned long long)_x; *op++ = _x);
|
||||
unsigned bitzigzag64(uint64_t *in, unsigned n, uint64_t *out, unsigned start) {
|
||||
typeof(in[0]) b = 0,*op = out;
|
||||
long long _x;
|
||||
BITZIGZAG(in, n, start, b |= (unsigned long long)_x; *op++ = _x);
|
||||
return bsr32(b);
|
||||
}
|
||||
|
||||
void bitunzigzag64(unsigned *p, unsigned n, unsigned start) {
|
||||
void bitunzigzag64(uint64_t *p, unsigned n, unsigned start) {
|
||||
BITUNZIGZAG(p, n, start);
|
||||
}
|
||||
|
||||
//------------------- De-/Compose Floating Point -----------------------------------------
|
||||
void bitdouble(double *in, unsigned n, unsigned *sgn, unsigned *expo, uint64_t *mant) {
|
||||
void bitdouble(double *in, unsigned n, int *expo, uint64_t *mant) {
|
||||
double *ip;
|
||||
uint64_t u;
|
||||
for(ip = in; ip < in+n; ip++) {
|
||||
u = *(uint64_t *)ip; BITFLOAT(u, *sgn++, *expo++, *mant++, DMANT_BITS, 1ull);
|
||||
uint64_t u = *(uint64_t *)ip;
|
||||
*expo++ = FLTEXPO(u, DMANT_BITS, 1ull);
|
||||
*mant++ = FLTMANT(u, DMANT_BITS, 1ull);
|
||||
}
|
||||
}
|
||||
|
||||
void bitundouble(unsigned *sgn, unsigned *expo, uint64_t *mant, unsigned n, double *out) {
|
||||
void bitundouble(int *expo, uint64_t *mant, unsigned n, double *out) {
|
||||
double *op;
|
||||
uint64_t u;
|
||||
for(op = out; op < out+n; op++) {
|
||||
BITUNFLOAT((uint64_t)(*sgn++), (uint64_t)(*expo++), *mant++, u, DMANT_BITS); *op = *(double *)&u;
|
||||
for(op = out; op < out+n; ) {
|
||||
BITUNFLOAT( (int64_t)(*expo++), *mant++, u, DMANT_BITS); *op++ = *(double *)&u;
|
||||
}
|
||||
}
|
||||
|
||||
void bitfloat(float *in, unsigned n, unsigned *sgn, unsigned *expo, unsigned *mant) {
|
||||
float *ip;
|
||||
unsigned u;
|
||||
void bitzdouble(double *in, unsigned n, int *expo, uint64_t *mant) {
|
||||
double *ip;
|
||||
for(ip = in; ip < in+n; ip++) {
|
||||
u = *(unsigned *)ip; BITFLOAT(u, *sgn++, *expo++, *mant++, FMANT_BITS, 1u);
|
||||
uint64_t u = *(uint64_t *)ip;
|
||||
*expo++ = zigzagenc32((int)FLTEXPO(u, DZMANT_BITS, 1ull)-1023);
|
||||
*mant++ = FLTMANT(u, DZMANT_BITS, 1ull);
|
||||
}
|
||||
}
|
||||
|
||||
void bitunfloat(unsigned *sgn, unsigned *expo, unsigned *mant, unsigned n, float *out) {
|
||||
void bitzundouble(int *expo, uint64_t *mant, unsigned n, double *out) {
|
||||
double *op;
|
||||
uint64_t u;
|
||||
for(op = out; op < out+n; ) {
|
||||
BITUNFLOAT( (int64_t)zigzagdec32(*expo++)+1023, *mant++, u, DZMANT_BITS); *op++ = *(double *)&u;
|
||||
}
|
||||
}
|
||||
|
||||
void bitfloat(float *in, unsigned n, int *expo, unsigned *mant) {
|
||||
float *ip;
|
||||
for(ip = in; ip < in+n; ip++) {
|
||||
unsigned u = *(unsigned *)ip;
|
||||
*expo++ = FLTEXPO(u, FMANT_BITS, 1u);
|
||||
*mant++ = FLTMANT(u, FMANT_BITS, 1u);
|
||||
}
|
||||
}
|
||||
|
||||
void bitunfloat(int *expo, unsigned *mant, unsigned n, float *out) {
|
||||
float *op;
|
||||
unsigned u;
|
||||
for(op = out; op < out+n; op++) {
|
||||
BITUNFLOAT((*sgn++), (*expo++), *mant++, u, FMANT_BITS); *op = *(float *)&u;
|
||||
BITUNFLOAT( (*expo++), *mant++, u, FMANT_BITS); *op = *(float *)&u;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
149
bitutil.h
149
bitutil.h
@ -1,5 +1,5 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2013-2015
|
||||
Copyright (C) powturbo 2013-2016
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -24,89 +24,103 @@
|
||||
// bitutil.h - "Integer Compression"
|
||||
#include <stdint.h>
|
||||
|
||||
#define _BITFORZERO(out, n, start, inc) do {\
|
||||
for(i = 0; i != (n&~3); ) {\
|
||||
out[i] = start+i*inc; i++;\
|
||||
out[i] = start+i*inc; i++;\
|
||||
out[i] = start+i*inc; i++;\
|
||||
out[i] = start+i*inc; i++;\
|
||||
}\
|
||||
while(i < n) out[i] = start+i*inc,++i;\
|
||||
#define _BITFORZERO(_out_, _n_, _start_, _inc_) do { unsigned _i;\
|
||||
for(_i = 0; _i != (_n_&~3); ) {\
|
||||
_out_[_i] = _start_+_i*_inc_; _i++;\
|
||||
_out_[_i] = _start_+_i*_inc_; _i++;\
|
||||
_out_[_i] = _start_+_i*_inc_; _i++;\
|
||||
_out_[_i] = _start_+_i*_inc_; _i++;\
|
||||
}\
|
||||
while(_i != _n_)\
|
||||
_out_[_i] = _start_+_i*_inc_, ++_i;\
|
||||
} while(0)
|
||||
|
||||
#define BITSIZE(__in, __n, __b, __usize) { typeof(__in[0]) *_ip;\
|
||||
for(__b=0,_ip = __in; _ip != __in+(__n&~(4-1)); )\
|
||||
__b |= *_ip++ | *_ip++ | *_ip++ | *_ip++;\
|
||||
while(_ip != __in+__n) __b |= *_ip++;\
|
||||
__b = TEMPLATE(bsr, __usize)(__b);\
|
||||
#define BITSIZE(_in_, _n_, _b_, _usize_) { typeof(_in_[0]) *_ip;\
|
||||
for(_b_=0,_ip = _in_; _ip != _in_+(_n_&~(4-1)); )\
|
||||
_b_ |= *_ip++ | *_ip++ | *_ip++ | *_ip++;\
|
||||
while(_ip != _in_+_n_) \
|
||||
_b_ |= *_ip++;\
|
||||
_b_ = TEMPLATE(bsr, _usize_)(_b_);\
|
||||
}
|
||||
|
||||
static inline unsigned zigzagenc32(int x) { return x << 1 ^ x >> 31; }
|
||||
static inline unsigned zigzagdec32(unsigned x) { return x >> 1 ^ -(x & 1); }
|
||||
static inline unsigned zigzagenc31(int x) { x = (x << 2 | ((x>>30)& 2)) ^ x >> 31; return x; }
|
||||
static inline unsigned zigzagdec31(unsigned x) { return (x >> 2 | (x& 2)<<30 ) ^ -(x & 1); }
|
||||
|
||||
static inline unsigned zigzagenc32(int x) { return x << 1 ^ x >> 31; }
|
||||
static inline unsigned zigzagdec32(unsigned x) { return x >> 1 ^ -(x & 1); }
|
||||
|
||||
static inline uint64_t zigzagenc64(int64_t x) { return x << 1 ^ x >> 63; }
|
||||
static inline uint64_t zigzagdec64(uint64_t x) { return x >> 1 ^ -(x & 1); }
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
// SIMD Delta
|
||||
#define DELTA128_32(_v_, _sv_) _mm_sub_epi32(_v_, _mm_or_si128(_mm_srli_si128(_sv_, 12), _mm_slli_si128(_v_, 4)))
|
||||
|
||||
#define DELTA128_32(__v, __sv) _mm_sub_epi32(__v, _mm_or_si128(_mm_srli_si128(__sv, 12), _mm_slli_si128(__v, 4)))
|
||||
// SIMD Scan ( prefix sum )
|
||||
#define SCAN128_32( _v_, _sv_) _v_ = _mm_add_epi32(_v_, _mm_slli_si128(_v_, 4)); _sv_ = _mm_add_epi32(_mm_shuffle_epi32(_sv_, _MM_SHUFFLE(3, 3, 3, 3)), _mm_add_epi32(_mm_slli_si128(_v_, 8), _v_) )
|
||||
#define SCANI128_32(_v_, _sv_, _vi_) SCAN128_32(_v_, _sv_); _sv_ = _mm_add_epi32(_sv_, _vi_)
|
||||
|
||||
#define SCAN128_32( __v, __sv) __v = _mm_add_epi32(__v, _mm_slli_si128(__v, 4)); __sv = _mm_add_epi32(_mm_shuffle_epi32(__sv, _MM_SHUFFLE(3, 3, 3, 3)), _mm_add_epi32(_mm_slli_si128(__v, 8), __v) )
|
||||
#define SCANI128_32(__v, __sv, __vi) SCAN128_32(__v, __sv); __sv = _mm_add_epi32(__sv, __vi)
|
||||
// SIMD ZigZag
|
||||
#define ZIGZAG128_32(_v_) _mm_xor_si128(_mm_slli_epi32(_v_,1), _mm_srai_epi32(_v_,31))
|
||||
#define UNZIGZAG128_32(_v_) _mm_xor_si128(_mm_srli_epi32(_v_,1), _mm_srai_epi32(_mm_slli_epi32(_v_,31),31) ) //_mm_sub_epi32(cz, _mm_and_si128(iv,c1))
|
||||
|
||||
#define ZIGZAG128_32(__v) _mm_xor_si128(_mm_slli_epi32(__v,1), _mm_srai_epi32(__v,31))
|
||||
#define UNZIGZAG128_32(__v) _mm_xor_si128(_mm_srli_epi32(__v,1), _mm_srai_epi32(_mm_slli_epi32(__v,31),31) ) //_mm_sub_epi32(cz, _mm_and_si128(iv,c1))
|
||||
// SIMD Horizontal OR
|
||||
#define HOR128_32(__v,__b) __v = _mm_or_si128(__v, _mm_srli_si128(__v, 8)); __v = _mm_or_si128(__v, _mm_srli_si128(__v, 4)); __b = (unsigned)_mm_cvtsi128_si32(__v)
|
||||
#define HOR128_32(_v_,_b_) _v_ = _mm_or_si128(_v_, _mm_srli_si128(_v_, 8)); _v_ = _mm_or_si128(_v_, _mm_srli_si128(_v_, 4)); _b_ = (unsigned)_mm_cvtsi128_si32(_v_)
|
||||
|
||||
#define BITSIZE32(__in, __n, __b) { typeof(__in[0]) *_ip; __m128i v = _mm_setzero_si128();\
|
||||
for(_ip = __in; _ip != __in+(__n&~(4-1)); _ip+=4) v = _mm_or_si128(v, _mm_loadu_si128((__m128i*)_ip));\
|
||||
HOR128_32(v,__b);\
|
||||
while(_ip != __in+__n) __b |= *_ip++;\
|
||||
__b = bsr32(__b);\
|
||||
#define BITSIZE32(_in_, _n_, _b_) { typeof(_in_[0]) *_ip; __m128i _v = _mm_setzero_si128();\
|
||||
for(_ip = _in_; _ip != _in_+(_n_&~(4-1)); _ip+=4)\
|
||||
_v = _mm_or_si128(_v, _mm_loadu_si128((__m128i*)_ip));\
|
||||
HOR128_32(_v,_b_);\
|
||||
while(_ip != _in_+_n_)\
|
||||
_b_ |= *_ip++;\
|
||||
_b_ = bsr32(_b_);\
|
||||
}
|
||||
|
||||
#define BITZERO32(out, n, start) do {\
|
||||
__m128i sv = _mm_set1_epi32(start), *ov = (__m128i *)(out), *ove = (__m128i *)(out + n);\
|
||||
do { _mm_storeu_si128(ov++, sv); } while(ov < ove); \
|
||||
// SIMD set value
|
||||
#define BITZERO32(_out_, _n_, _start_) do {\
|
||||
__m128i _sv_ = _mm_set1_epi32(_start_), *_ov = (__m128i *)(_out_), *_ove = (__m128i *)(_out_ + _n_);\
|
||||
do _mm_storeu_si128(_ov++, _sv_); while(_ov < _ove); \
|
||||
} while(0)
|
||||
|
||||
#define BITFORZERO32(out, n, start, inc) do {\
|
||||
__m128i sv = _mm_set1_epi32(start), *ov=(__m128i *)(out), *ove = (__m128i *)(out + n), cv = _mm_set_epi32(3*inc,2*inc,1*inc,0); \
|
||||
sv = _mm_add_epi32(sv, cv);\
|
||||
cv = _mm_set1_epi32(4);\
|
||||
do { _mm_storeu_si128(ov++, sv); sv = _mm_add_epi32(sv, cv); } while(ov < ove);\
|
||||
#define BITFORZERO32(_out_, _n_, _start_, _inc_) do {\
|
||||
__m128i _sv = _mm_set1_epi32(_start_), *_ov=(__m128i *)(_out_), *_ove = (__m128i *)(_out_ + _n_), _cv = _mm_set_epi32(3*_inc_,2*_inc_,1*_inc_,0); \
|
||||
_sv = _mm_add_epi32(_sv, _cv);\
|
||||
_cv = _mm_set1_epi32(4);\
|
||||
do { _mm_storeu_si128(_ov++, _sv); _sv = _mm_add_epi32(_sv, _cv); } while(_ov < _ove);\
|
||||
} while(0)
|
||||
|
||||
#define BITDIZERO32(out, n, start, inc) do { __m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(3+inc,2+inc,1+inc,inc), *ov=(__m128i *)(out), *ove = (__m128i *)(out + n);\
|
||||
sv = _mm_add_epi32(sv, cv); cv = _mm_set1_epi32(4*inc); do { _mm_storeu_si128(ov++, sv), sv = _mm_add_epi32(sv, cv); } while(ov < ove);\
|
||||
#define BITDIZERO32(_out_, _n_, _start_, _inc_) do { __m128i _sv = _mm_set1_epi32(_start_), _cv = _mm_set_epi32(3+_inc_,2+_inc_,1+_inc_,_inc_), *_ov=(__m128i *)(_out_), *_ove = (__m128i *)(_out_ + _n_);\
|
||||
_sv = _mm_add_epi32(_sv, _cv); _cv = _mm_set1_epi32(4*_inc_); do { _mm_storeu_si128(_ov++, _sv), _sv = _mm_add_epi32(_sv, _cv); } while(_ov < _ove);\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
#define BITSIZE32(__in, __n, __b) BITSIZE(__in, __n, __b, 32)
|
||||
#define BITFORZERO32(out, n, start, inc) _BITFORZERO(out, n, start, inc)
|
||||
#define BITZERO32(out, n, start) _BITFORZERO(out, n, start, 0)
|
||||
#define BITSIZE32(_in_, _n_, _b_) BITSIZE(_in_, _n_, _b_, 32)
|
||||
#define BITFORZERO32(_out_, _n_, _start_, _inc_) _BITFORZERO(_out_, _n_, _start_, _inc_)
|
||||
#define BITZERO32(_out_, _n_, _start_) _BITFORZERO(_out_, _n_, _start_, 0)
|
||||
#endif
|
||||
|
||||
|
||||
#define DELTR( __in, __n, __mode, __out) { unsigned _v; for( __out[0]=__in[0],_v = 1; _v < __n; _v++) __out[_v] = (__in[_v] - __out[0]) - _v*__mode; }
|
||||
#define DELTRB(__in, __n, __mode, __b, __out) { unsigned _v; for(__b=0,__out[0]=__in[0],_v = 1; _v < __n; _v++) __out[_v] = (__in[_v] - __out[0]) - _v*__mode, __b |= __out[_v]; __b = bsr32(__b); }
|
||||
#define DELTR( _in_, _n_, _mode_, _out_) { unsigned _v; for( _out_[0]=_in_[0],_v = 1; _v < _n_; _v++) _out_[_v] = (_in_[_v] - _out_[0]) - _v*_mode_; }
|
||||
#define DELTRB(_in_, _n_, _mode_, _b_, _out_) { unsigned _v; for(_b_=0,_out_[0]=_in_[0],_v = 1; _v < _n_; _v++) _out_[_v] = (_in_[_v] - _out_[0]) - _v*_mode_, _b_ |= _out_[_v]; _b_ = bsr32(_b_); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// get maximum bit length of the elements in the integer array
|
||||
//------------- get maximum bit length of the elements in the integer array -----------------------
|
||||
unsigned bit32( unsigned *in, unsigned n);
|
||||
|
||||
// transform sorted integer array to delta array. inc = increment
|
||||
//------------- Delta for sorted integer array ----------------------------------------------------
|
||||
//-- transform sorted integer array to delta array. inc = increment: out[i] = in[i] - in[i-1] - inc
|
||||
unsigned bitdelta32(unsigned *in, unsigned n, unsigned *out, unsigned start, unsigned inc);
|
||||
unsigned bitdelta64(uint64_t *in, unsigned n, uint64_t *out, uint64_t start, unsigned inc);
|
||||
|
||||
// get delta maximum bit length of the non decreasing integer array
|
||||
//-- get delta maximum bit length of the non decreasing integer array. out[i] = in[i] - in[i-1]
|
||||
unsigned bitd32( unsigned *in, unsigned n, unsigned start);
|
||||
|
||||
// get delta maximum bit length of the non strictly decreasing integer array
|
||||
//-- get delta maximum bit length of the non strictly decreasing integer array. out[i] = in[i] - in[i-1] - 1
|
||||
unsigned bitd132( unsigned *in, unsigned n, unsigned start);
|
||||
|
||||
//-- in-place reverse delta transform
|
||||
void bitund32( unsigned *p, unsigned n, unsigned x);
|
||||
void bitund64( uint64_t *p, unsigned n, uint64_t x);
|
||||
|
||||
@ -115,32 +129,47 @@ void bitundx64( uint64_t *p, unsigned n, uint64_t x, unsigned inc);
|
||||
|
||||
void bitund132( unsigned *p, unsigned n, unsigned x);
|
||||
|
||||
// for
|
||||
//------------- FOR array bit length: out[i] = in[i] - start -------------------------------------
|
||||
|
||||
unsigned bitf32( unsigned *in, unsigned n, unsigned start); // sorted
|
||||
unsigned bitf132( unsigned *in, unsigned n, unsigned start);
|
||||
unsigned bitfm32( unsigned *in, unsigned n, unsigned *pmin); // unsorted
|
||||
unsigned bitf1m32( unsigned *in, unsigned n, unsigned *pmin);
|
||||
|
||||
// zigzag encoding for unsorted integer lists
|
||||
//------------- Zigzag encoding for unsorted integer lists: out[i] = in[i] - in[i-1] -------------
|
||||
|
||||
//-- get maximum zigzag bit length integer array
|
||||
unsigned bitz32( unsigned *in, unsigned n, unsigned start);
|
||||
|
||||
//-- Zigzag transform
|
||||
unsigned bitzigzag32(unsigned *in, unsigned n, unsigned *out, unsigned start);
|
||||
unsigned bitzigzag64(unsigned *in, unsigned n, unsigned *out, unsigned start);
|
||||
unsigned bitzigzag64(uint64_t *in, unsigned n, uint64_t *out, unsigned start);
|
||||
|
||||
//-- Zigzag reverse transform
|
||||
void bitunzigzag32( unsigned *p, unsigned n, unsigned start);
|
||||
void bitunzigzag64( unsigned *p, unsigned n, unsigned start);
|
||||
void bitunzigzag64( uint64_t *p, unsigned n, unsigned start);
|
||||
|
||||
//---- Floating point to Integer de-/composition ---------------------------------
|
||||
#define FMANT_BITS 16
|
||||
#define DMANT_BITS 32
|
||||
#define DZMANT_BITS 36
|
||||
|
||||
#define FMANT_BITS 23
|
||||
#define DMANT_BITS 52
|
||||
|
||||
#define BITFLOAT(__u, __sgn, __expo, __mant, __mantbits, __one) __sgn = __u >> (sizeof(__u)*8-1); __expo = ((__u >> (__mantbits)) & ( (__one<<(sizeof(__u)*8 - 1 - __mantbits)) -1)); __mant = __u & ((__one<<__mantbits)-1);
|
||||
#define BITUNFLOAT( __sgn, __expo, __mant, __u, __mantbits) __u = (__sgn) << (sizeof(__u)*8-1) | (__expo) << __mantbits | (__mant)
|
||||
#define FLTEXPO(__u,__mantbits, __one) ( ((__u) >> __mantbits) & ( (__one<<(sizeof(__u)*8 - __mantbits)) - 1 ) )
|
||||
#define FLTMANT(__u,__mantbits, __one) ((__u) & ((__one<<__mantbits)-1))
|
||||
|
||||
#define BITUNFLOAT(__expo, __mant, __u, __mantbits) __u = ((__expo) << __mantbits) | (__mant)//>>1 | (__mant)<<(sizeof(__u)*8 - 1)
|
||||
|
||||
/*#define BITFLOAT(__u, __sgn, __expo, __mant, __mantbits, __one) __sgn = __u >> (sizeof(__u)*8-1); __expo = EXPO(__u,__mantbits; __mant = __u & ((__one<<__mantbits)-1)
|
||||
#define BITUNFLOAT( __sgn, __expo, __mant, __u, __mantbits) __u = (__sgn) << (sizeof(__u)*8-1) | (__expo) << __mantbits | (__mant) */
|
||||
|
||||
// De-/Compose floating point array to/from integer arrays (sign,exponent,mantissa) for using with "Integer Compression" functions ------------
|
||||
void bitdouble( double *in, unsigned n, unsigned *sgn, unsigned *expo, uint64_t *mant);
|
||||
void bitundouble( unsigned *sgn, unsigned *expo, uint64_t *mant, unsigned n, double *out);
|
||||
void bitfloat( float *in, unsigned n, unsigned *sgn, unsigned *expo, unsigned *mant);
|
||||
void bitunfloat( unsigned *sgn, unsigned *expo, unsigned *mant, unsigned n, float *out);
|
||||
void bitdouble( double *in, unsigned n, int *expo, uint64_t *mant);
|
||||
void bitundouble( int *expo, uint64_t *mant, unsigned n, double *out);
|
||||
void bitzdouble( double *in, unsigned n, int *expo, uint64_t *mant);
|
||||
void bitzundouble( int *expo, uint64_t *mant, unsigned n, double *out);
|
||||
void bitfloat( float *in, unsigned n, int *expo, unsigned *mant);
|
||||
void bitunfloat( int *expo, unsigned *mant, unsigned n, float *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
20
conf.h
20
conf.h
@ -38,13 +38,19 @@
|
||||
#define popcnt64(_x_) __builtin_popcountll(_x_)
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
static inline int __bsr32(int x) { asm("bsr %1,%0" : "=r" (x) : "rm" (x) ); return x; }
|
||||
static inline int bsr32( int x) { int b = -1; asm("bsrl %1,%0" : "+r" (b) : "rm" (x) ); return b + 1; }
|
||||
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
|
||||
#define bsr16(_x_) bsr32(_x_)
|
||||
static inline int __bsr32( int x) { asm("bsr %1,%0" : "=r" (x) : "rm" (x) ); return x; }
|
||||
static inline int bsr32( int x) { int b = -1; asm("bsrl %1,%0" : "+r" (b) : "rm" (x) ); return b + 1; }
|
||||
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
|
||||
#define bsr16(_x_) bsr32(_x_)
|
||||
|
||||
static inline unsigned rol32(unsigned x, int s) { asm ("roll %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
|
||||
static inline unsigned ror32(unsigned x, int s) { asm ("rorl %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
|
||||
|
||||
#else
|
||||
static inline int bsr32(int x ) { return x?32 - __builtin_clz( x):0; }
|
||||
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
|
||||
static inline int bsr32(int x ) { return x?32 - __builtin_clz( x):0; }
|
||||
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
|
||||
static inline unsigned rol32(unsigned x, int s) { return x << s | x >> (32 - s); }
|
||||
static inline unsigned ror32(unsigned x, int s) { return x >> s | x << (32 - s); }
|
||||
#endif
|
||||
|
||||
#define ctz64(_x_) __builtin_ctzll(_x_)
|
||||
@ -65,6 +71,8 @@ static inline int bsr64(unsigned long long x) { unsigned long z = 0; _BitScanFor
|
||||
static inline int ctz64(unsigned long long x) { unsigned long z = 0; _BitScanForward64(&z, x); return z; }
|
||||
#endif
|
||||
static inline int ctz32(unsigned x) { unsigned z = 0; _BitScanForward(&z, x); return z; }
|
||||
#define rol32(x,s) _lrotl(x, s)
|
||||
#define ror32(x,s) _lrotr(x, s)
|
||||
#define fseeko _fseeki64
|
||||
#define ftello _ftelli64
|
||||
#define sleep(x) Sleep(x/1000)
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2013-2015
|
||||
Copyright (C) powturbo 2013-2016
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -1080,8 +1080,8 @@ int main(int argc, char *argv[]) { int r;
|
||||
uint64_t *mantissa = malloc(n*sizeof(mantissa[0]));
|
||||
unsigned *sign = malloc(n*sizeof(sign[0]));
|
||||
unsigned *exp = malloc(n*sizeof(exp[0])); if(!mantissa || !exp || !sign || !dcpy) die("alloc error\n");
|
||||
bitdouble( din, n, sign, exp, mantissa);
|
||||
bitundouble( sign, exp, mantissa, n, dcpy);
|
||||
bitdouble( din, n, exp, mantissa);
|
||||
bitundouble( exp, mantissa, n, dcpy);
|
||||
int i; for(i=0;i < n; i++) { printf("%d,%d,%llu,%e,%e\n", sign[i], exp[i],(long long unsigned int)mantissa[i], din[i], dcpy[i]); if(din[i]!=dcpy[i]) die("check error at %d %e %e\n", i, din[i], dcpy[i]); }
|
||||
free(din); free(mantissa); free(exp); free(sign); free(dcpy);
|
||||
exit(0);
|
||||
|
||||
186
vint.c
186
vint.c
@ -1,5 +1,5 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2013-2015
|
||||
Copyright (C) powturbo 2013-2016
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -22,67 +22,45 @@
|
||||
- email : powturbo [_AT_] gmail [_DOT_] com
|
||||
**/
|
||||
// vint.c - "Integer Compression" variable byte
|
||||
#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "conf.h"
|
||||
#include "vint.h"
|
||||
#include "bitutil.h"
|
||||
|
||||
#define _vbputu32(__op, __x, __act) {\
|
||||
if(likely(__x < (1<< 7))) { *__op++ = __x << 1; __act;}\
|
||||
else if(likely(__x < (1<<14))) { *(unsigned short *)__op = __x << 2 | 0x01; __op += 2; __act;}\
|
||||
else if(likely(__x < (1<<21))) { *(unsigned *)__op = __x << 3 | 0x03; __op += 3; __act;}\
|
||||
else if(likely(__x < (1<<28))) { *(unsigned *)__op = __x << 4 | 0x07; __op += 4; __act;}\
|
||||
else { *(unsigned *)__op = __x << 4 | 0x0f; __op += 4; *__op++ = __x >> 28; __act;}\
|
||||
}
|
||||
|
||||
#define _vbgetu32(__ip, __x, __act) do {\
|
||||
if(!((__x = *__ip) & (1<<0))) { __ip++; __x >>= 1; __act;}\
|
||||
else if(!(__x & (1<<1))) { __x = (*(unsigned short *)__ip) >> 2; __ip += 2; __act;}\
|
||||
else if(!(__x & (1<<2))) { __x = (*(unsigned *)__ip & 0xffffffu) >> 3; __ip += 3; __act;}\
|
||||
else if(!(__x & (1<<3))) { __x = (*(unsigned *)__ip) >> 4; __ip += 4; __act;}\
|
||||
else { __x = (unsigned long long)(*(unsigned *)__ip) >> 4 | (unsigned long long)(__ip[4]) << 28; __ip += 5; __act;}\
|
||||
} while(0)
|
||||
|
||||
#define vbputu32(__op, __x) { unsigned _x_ = __x; _vbputu32(__op, _x_, ;); }
|
||||
|
||||
//-------------------------------------- variable byte : 32 bits ----------------------------------------------------------------
|
||||
#if defined(__AVX2__) && defined(__AVX2__VINT)
|
||||
#include <immintrin.h>
|
||||
#define M1 0xfeull //7
|
||||
#define M2 0xfffcull //14
|
||||
#define M3 0xfffff8ull //21
|
||||
#define M4 0xfffffff0ull //28
|
||||
#define M5 0xfffffffff0ull //36
|
||||
|
||||
//0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
|
||||
unsigned long long mtab[] = { M1, M2, M1, M3, M1, M2, M1, M4, M1, M2, M1, M3, M1, M2, M1, M5 };
|
||||
#endif
|
||||
//------------------------------------------------------------------------------------------------------------------------
|
||||
//0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
|
||||
unsigned char vtab[] = { 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5 };
|
||||
unsigned char vtab[] = { 1, 1, 1, 1, 1, 1, 1, 1, 5, 4, 3, 3, 2, 2, 2, 2 };
|
||||
|
||||
// decompress buffer into an array of n unsigned values. Return value = end of decompressed buffer in
|
||||
unsigned char *vbdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out) { unsigned x,*op;
|
||||
for(op = out; op != out+(n&~(4-1)); op += 4) {
|
||||
_vbgetu32(in, x, op[0] = x);
|
||||
_vbgetu32(in, x, op[1] = x);
|
||||
_vbgetu32(in, x, op[2] = x);
|
||||
_vbgetu32(in, x, op[3] = x);
|
||||
unsigned char *vbdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out) { register unsigned x, *op;
|
||||
for(op = out; op != out+(n&~(8-1)); op += 8) {
|
||||
_vbget32(in, x, op[0] = x);
|
||||
_vbget32(in, x, op[1] = x);
|
||||
_vbget32(in, x, op[2] = x);
|
||||
_vbget32(in, x, op[3] = x); __builtin_prefetch(in+256, 0);
|
||||
_vbget32(in, x, op[4] = x);
|
||||
_vbget32(in, x, op[5] = x);
|
||||
_vbget32(in, x, op[6] = x);
|
||||
_vbget32(in, x, op[7] = x);
|
||||
}
|
||||
while(op != out+n) { _vbgetu32(in, x, ; ); *op++ = x; }
|
||||
while(op != out+n) _vbget32(in, x, *op++ = x );
|
||||
return in;
|
||||
}
|
||||
|
||||
// encode array with n unsigned (32 bits in[n]) values to the buffer out. Return value = end of compressed buffer out
|
||||
unsigned char *vbenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out) { unsigned *ip;
|
||||
for(ip = in; ip != in+(n&~(4-1)); ) {
|
||||
vbputu32(out, *ip++);
|
||||
vbputu32(out, *ip++);
|
||||
vbputu32(out, *ip++);
|
||||
vbputu32(out, *ip++);
|
||||
unsigned char *vbenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out) { register unsigned x, *ip;
|
||||
for(ip = in; ip != in+(n&~(8-1)); ip += 8) { __builtin_prefetch(ip+128, 0);
|
||||
x = ip[0]; _vbput32(out, x, ;);
|
||||
x = ip[1]; _vbput32(out, x, ;);
|
||||
x = ip[2]; _vbput32(out, x, ;);
|
||||
x = ip[3]; _vbput32(out, x, ;);
|
||||
x = ip[4]; _vbput32(out, x, ;);
|
||||
x = ip[5]; _vbput32(out, x, ;);
|
||||
x = ip[6]; _vbput32(out, x, ;);
|
||||
x = ip[7]; _vbput32(out, x, ;);
|
||||
}
|
||||
while(ip != in+n) vbputu32(out, *ip++);
|
||||
while(ip != in+n) { x = *ip++; _vbput32(out, x, ;); }
|
||||
return out;
|
||||
}
|
||||
|
||||
@ -113,28 +91,28 @@ unsigned char *vbenc64(uint64_t *__restrict in, unsigned n, unsigned char *__res
|
||||
unsigned char *vbdenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start) {
|
||||
unsigned *ip,v;
|
||||
for(ip = in; ip != in+(n&~(4-1)); ) {
|
||||
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
|
||||
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
|
||||
}
|
||||
while(ip < in+n) { v = (*ip)-start; start = *ip++; _vbputu32(out, v, ;); }
|
||||
while(ip < in+n) { v = (*ip)-start; start = *ip++; _vbput32(out, v, ;); }
|
||||
return out;
|
||||
}
|
||||
|
||||
unsigned char *vbddec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start) {
|
||||
unsigned x,*op;
|
||||
for(op = out; op != out+(n&~(8-1)); ) {
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
_vbget32(in, x, ;); *op++ = (start += x);
|
||||
}
|
||||
while(op != out+n) _vbgetu32(in, x, *op++ = (start += x));
|
||||
while(op != out+n) _vbget32(in, x, *op++ = (start += x));
|
||||
return in;
|
||||
}
|
||||
|
||||
@ -147,21 +125,21 @@ unsigned char *vbd1enc32(unsigned *__restrict in, unsigned n, unsigned char *__r
|
||||
v = in[0] - start - 1;
|
||||
unsigned long long u = (unsigned long long)v<<1;
|
||||
if(n == 1) u |= 1;
|
||||
_vbputu32(op, u, ;);
|
||||
_vbput32(op, u, ;);
|
||||
if(!--n) return op;
|
||||
start = *in++;
|
||||
#endif
|
||||
for(ip = in; ip != in + (n&~(4-1)); ) {
|
||||
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
|
||||
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
|
||||
}
|
||||
while(ip != in+n) { v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v; }
|
||||
while(ip != in+n) { v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v; }
|
||||
#ifdef VINT_Z
|
||||
if(!b) {
|
||||
u = (unsigned long long)in[-1] << 1 | 1;
|
||||
_vbputu32(out, u, ;);
|
||||
_vbput32(out, u, ;);
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
@ -171,7 +149,7 @@ unsigned char *vbd1enc32(unsigned *__restrict in, unsigned n, unsigned char *__r
|
||||
unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start) {
|
||||
unsigned x,*op;
|
||||
#ifdef VINT_Z
|
||||
unsigned long long u; _vbgetu32(in, u, ;); x = u>>1; *out = (start += x+1);
|
||||
unsigned long long u; _vbget32(in, u, ;); x = u>>1; *out = (start += x+1);
|
||||
if(u & 1) {
|
||||
#ifdef __SSE2__
|
||||
out++; --n; BITDIZERO32(out, n, start, 1);
|
||||
@ -184,16 +162,16 @@ unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned *__r
|
||||
#endif
|
||||
|
||||
for(op = out; op != out+(n&~(8-1)); ) {
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbgetu32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
_vbget32(in, x, ++x); *op++ = (start += x);
|
||||
}
|
||||
while(op != out+n) { _vbgetu32(in, x, ++x); *op++ = (start += x); }
|
||||
while(op != out+n) { _vbget32(in, x, ++x); *op++ = (start += x); }
|
||||
return in;
|
||||
}
|
||||
|
||||
@ -208,27 +186,51 @@ unsigned char *vbdec16(unsigned char *__restrict in, unsigned n, unsigned short
|
||||
unsigned char *vbzenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start) {
|
||||
unsigned *ip,v;
|
||||
for(ip = in; ip != in+(n&~(4-1)); ) {
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
|
||||
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
|
||||
}
|
||||
while(ip < in+n) { v = zigzagenc32((*ip)-start); start = *ip++; _vbputu32(out, v, ;); }
|
||||
while(ip < in+n) { v = zigzagenc32((*ip)-start); start = *ip++; _vbput32(out, v, ;); }
|
||||
return out;
|
||||
}
|
||||
|
||||
unsigned char *vbzdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start) {
|
||||
unsigned x,*op;
|
||||
for(op = out; op != out+(n&~(8-1)); ) {
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
|
||||
}
|
||||
while(op != out+n) _vbgetu32(in, x, *op++ = (start += zigzagdec32(x)));
|
||||
while(op != out+n) _vbget32(in, x, *op++ = (start += zigzagdec32(x)));
|
||||
return in;
|
||||
}
|
||||
|
||||
unsigned char *vbzenc64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start) {
|
||||
uint64_t *ip,v;
|
||||
for(ip = in; ip != in+(n&~(4-1)); ) {
|
||||
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
|
||||
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
|
||||
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
|
||||
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
|
||||
}
|
||||
while(ip < in+n) { v = zigzagenc64((*ip)-start); start = *ip++; _vbput64(out, v, ;); }
|
||||
return out;
|
||||
}
|
||||
|
||||
unsigned char *vbzdec64(unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start) {
|
||||
uint64_t x,*op;
|
||||
for(op = out; op != out+(n&~(4-1)); ) {
|
||||
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
|
||||
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
|
||||
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
|
||||
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
|
||||
}
|
||||
while(op != out+n) _vbget64(in, x, *op++ = (start += zigzagdec64(x)));
|
||||
return in;
|
||||
}
|
||||
|
||||
48
vint.h
48
vint.h
@ -1,5 +1,5 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2013-2015
|
||||
Copyright (C) powturbo 2013-2016
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -31,39 +31,31 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//--------- 32 bits ------------------
|
||||
//--------------------------- 32 bits ---------------------------------------------------------------------------------------
|
||||
extern unsigned char vtab[];
|
||||
#define vbvlen32(__x) vtab[(__x)&0xf]
|
||||
#define vbvlen32(__x) vtab[((unsigned char)(__x))>>4]
|
||||
|
||||
#define _vbput32(__op, __x, __act) {\
|
||||
if(likely(__x < (1<< 7))) { *__op++ = __x << 1; __act;}\
|
||||
else if(likely(__x < (1<<14))) { *(unsigned short *)__op = __x << 2 | 0x01; __op += 2; __act;}\
|
||||
else if(likely(__x < (1<<21))) { *(unsigned short *)__op = __x << 3 | 0x03; __op += 2; *__op++ = __x >> 13; __act;}\
|
||||
else if(likely(__x < (1<<28))) { *(unsigned *)__op = __x << 4 | 0x07; __op += 4; __act;}\
|
||||
else { *(unsigned *)__op = __x << 4 | 0x0f; __op += 4; *__op++ = __x >> 28; __act;}\
|
||||
if(likely(__x < (1<< 7))) { *__op++ = __x; __act;}\
|
||||
else if(likely(__x < (1<<14))) { ctou16(__op) = __x << 8 | __x >> 8 | 0x80; __op += 2; __act;}\
|
||||
else if(likely(__x < (1<<21))) { *__op++ = __x >> 16 | 0xc0; ctou32(__op) = __x; __op += 2; __act;}\
|
||||
else if(likely(__x < (1<<28))) { ctou32(__op) = rol32(__x,8) | 0xe0; __op += 4; __act;}\
|
||||
else { *__op++ = (unsigned long long)__x >> 32 | 0xf0; ctou32(__op) = __x; __op += 4; __act;}\
|
||||
}
|
||||
|
||||
//#define __AVX2__VINT
|
||||
#if defined(__AVX2__) && defined(__AVX2__VINT)
|
||||
#include <immintrin.h>
|
||||
|
||||
extern unsigned long long mtab[];
|
||||
|
||||
#define _vbget32(__ip, __x, __act) do { unsigned _vdx=(*__ip)&0xf; __x = _pext_u64(*(unsigned long long *)__ip, mtab[_vdx]); __ip+=vtab[_vdx]; __act; } while(0)
|
||||
#else
|
||||
#define _vbget32(__ip, __x, __act) do {\
|
||||
if(!((__x = *__ip) & (1<<0))) { __ip++; __x >>= 1; __act;}\
|
||||
else if(!(__x & (1<<1))) { __x = (*(unsigned short *)__ip) >> 2; __ip += 2; __act;}\
|
||||
else if(!(__x & (1<<2))) { __x = (*(unsigned short *)__ip) >> 3 | (unsigned)(*(__ip+2)) << 13; __ip += 3; __act;}\
|
||||
else if(!(__x & (1<<3))) { __x = (*(unsigned *)__ip) >> 4; __ip += 4; __act;}\
|
||||
else { __x = (unsigned long long)(*(unsigned *)__ip) >> 4 | (unsigned long long)(__ip[4]) << 28; __ip += 5; __act;}\
|
||||
#define _vbget32(__ip, __x, __act) do { __x = *__ip++;\
|
||||
if(!(__x & 0x80)) { __act;}\
|
||||
else if(!(__x & 0x40)) { __x = (__x & 0x3f)<< 8 | *__ip++; __act;}\
|
||||
else if(!(__x & 0x20)) { __x = (__x & 0x1f)<<16 | ctou16(__ip); __ip += 2; __act;}\
|
||||
else if(!(__x & 0x10)) { __x = ror32(ctou32(__ip-1),8) & 0xfffffff; __ip += 3; __act;}\
|
||||
else { __x = (unsigned long long)(__x & 0x07)<<32 | ctou32(__ip); __ip += 4; __act;}\
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
//----------------- 16 bits --------------------------
|
||||
//----------------- 16 bits -------------------------------------------------------------------------------------------------------
|
||||
#define _vbput16(__op, __x) _vbput32(__op, __x)
|
||||
#define _vbget16(__ip, __x, __act) _vbget32(__ip, __x, __act)
|
||||
//----------------- 64 bits --------------------------
|
||||
|
||||
//----------------- 64 bits -------------------------------------------------------------------------------------------------------
|
||||
#define _vbput64(__op, __x, __act) {\
|
||||
if(__x < 1 << 7) { *__op++ = __x << 1; __act;}\
|
||||
else if(__x < 1 <<14) { *(unsigned short *)__op = __x << 2 | 0x01; __op += 2; __act;}\
|
||||
@ -96,8 +88,8 @@ extern unsigned long long mtab[];
|
||||
#define vbput16(__op, __x) vbput32(__op, __x)
|
||||
#define vbget16(__ip) vbget32(__ip)
|
||||
|
||||
#define vbput32(__op, __x) { unsigned _x_ = __x; _vbput32(__op, _x_, ;); }
|
||||
#define vbget32(__ip) ({ unsigned _x_; _vbget32(__ip, _x_, ;); _x_; })
|
||||
#define vbput32(__op, __x) { register unsigned _x_ = __x; _vbput32(__op, _x_, ;); }
|
||||
#define vbget32(__ip) ({ register unsigned _x_; _vbget32(__ip, _x_, ;); _x_; })
|
||||
|
||||
#define vbput64(__op, __x) { unsigned long long _x_ = __x; _vbput64(__op, _x_, ;); }
|
||||
#define vbget64(__ip) ({ unsigned long long _x_; _vbget64(__ip, _x_, ;); _x_; })
|
||||
@ -122,6 +114,8 @@ unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned
|
||||
//------ zigzag encoding integer lists -------------------------------------------------------------
|
||||
unsigned char *vbzenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start);
|
||||
unsigned char *vbzdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start);
|
||||
unsigned char *vbzenc64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start);
|
||||
unsigned char *vbzdec64(unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
|
||||
|
||||
//--- 15 bits integer lists ------------
|
||||
#define vbput15(__op, __x) do { unsigned _x = __x; if(likely(_x < 0x80)) *__op++ = _x; else { *__op++ = (_x) >> 8 | 0x80; *__op++ = _x; } } while(0)
|
||||
|
||||
Reference in New Issue
Block a user