This commit is contained in:
powturbo
2016-04-17 15:56:51 +02:00
parent 62fb4b0115
commit ab6f60b2d8
9 changed files with 337 additions and 259 deletions

View File

@ -26,6 +26,7 @@ TurboPFor: Fastest Integer Compression [![Build Status](https://travis-ci.org/po
<p>
+ **Variable byte**
- :sparkles: Scalar **"Variable Byte"** faster and more efficient than **ANY** other (incl. SIMD MaskedVByte) implementation
- :new: **now up to 25% more faster**
<p>
+ **Simple family**
- :sparkles: **Novel** **"Variable Simple"** (incl. **RLE**) faster and more efficient than simple16, simple-8b
@ -71,7 +72,7 @@ CPU: Sandy bridge i7-2600k at 4.2GHz, gcc 5.1, ubuntu 15.04, single thread.
| 99.910.930| 24.98| 7.99| 2524.50|1943.41|[SIMDPack FPF](#FastPFor)|
| 99.910.930| 24.98| 7.99| 1883.21|1898.11|**TurboPack**|
| 99.910.930| 24.98| 7.99| 1877.25| 935.83|**TurboForDA**|
|102.074.663| 25.52| 8.17| 1621.64|1694.64|**TurboVbyte**|
|102.074.663| 25.52| 8.17| 1993.95|1827.04|**TurboVbyte**|
|102.074.663| 25.52|8.17|1214.12|1688.95|[MaskedVByte](#MaskedVByte)|
|102.074.663| 25.52| 8.17| 1178.72| 949.59|[Vbyte FPF](#FastPFor)|
|103.035.930| 25.76| 8.24| 1480.47|1746.51|[libfor](#libfor)|
@ -90,16 +91,16 @@ CPU: Skylake i7-6700 w/ only 3.7GHz
| 63392801| 15.85| 5.07| 387.30| 243.62|**TurboPForDA**|
| 65359916| 16.34| 5.23| 7.58| 609.12|OptPFD|
| 73477088| 18.37| 5.88| 101.68| 621.37|Simple16|
| 78514276| 19.63| 6.28|256.83|676.45|**VSimple**|
| 95915096| 23.98| 7.67| 211.79|954.62|Simple-8b|
| 78514276| 19.63| 6.28|258.31|691.48|**VSimple**|
| 95915096| 23.98| 7.67| 211.79|957.62|Simple-8b|
| 98546814| 24.64| 7.88| 70.85|**2349.71**|[QMX](#QMX)|
| 99910930| 24.98| 7.99|**3537.57**|**3081.79**|**TurboPackV**|
| 99910930| 24.98| 7.99| 3099.52|3071.77|SIMDPack FPF|
| 99910930| 24.98| 7.99| 2050.47|2402.54|**TurboPack**|
| 99910930| 24.98| 7.99| 2095.79|2495.22|**TurboPack**|
| 99910930| 24.98| 7.99| 2049.85|2364.52|**TurboFor**|
| 99910930| 24.98| 7.99| 2049.70|1124.12|**TurboForDA**|
|102074663| 25.52| 8.17| 1354.42|1745.69|MaskedVByte|
|102074663| 25.52| 8.17| 1660.76|1626.67|**TurboVbyte**|
|102074663| 25.52| 8.17| 1825.64|1844.34|**TurboVbyte**|
|102074663| 25.52| 8.17| 1249.77|1051.85|Vbyte FPF|
|112500000| 28.12| 9.00| 466.94|3003.70|VarintG8IU|
|128125000| 32.03| 10.25| 1109.67|1271.32|[StreamVbyte FPF](#FastPFor)|
@ -310,4 +311,4 @@ header files to use with documentation:<br />
- [On Inverted Index Compression for Search Engine Efficiency](http://www.dcs.gla.ac.uk/~craigm/publications/catena14compression.pdf)
- [Google's Group Varint Encoding](http://static.googleusercontent.com/media/research.google.com/de//people/jeff/WSDM09-keynote.pdf)
Last update: 27 MAR 2016
Last update: 08 APR 2016

View File

@ -35,17 +35,17 @@
#define DSTI(__op)
#define BPI(__w, __x, __parm) __w
#include __FILE__
unsigned char *bitunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return ip; }
unsigned char *bitunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return ip; }
unsigned char *bitunpack64( const unsigned char *__restrict in, unsigned n, uint64_t *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK64(in, n, b, out, 0); return ip; }
unsigned char *bitunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return (unsigned char *)ip; }
unsigned char *bitunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, 0); return (unsigned char *)ip; }
unsigned char *bitunpack64( const unsigned char *__restrict in, unsigned n, uint64_t *__restrict out , unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK64(in, n, b, out, 0); return (unsigned char *)ip; }
#undef BPI
#undef DSTI
//-----------------------------------------------------------------------------------------------------------------
#define DSTI(__op)
#define BPI(__w, __x, __parm) (__parm += (__w) + 1)
#include __FILE__
unsigned char *bitd1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitd1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitd1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
unsigned char *bitd1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
#undef BPI
#undef DSTI
@ -53,8 +53,8 @@ unsigned char *bitd1unpack16(const unsigned char *__restrict in, unsigned n, uns
#define DSTI(__op)
#define BPI(__w, __x, __parm) (__parm += (__w))
#include __FILE__
unsigned char *bitdunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitdunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitdunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
unsigned char *bitdunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
#undef BPI
#undef DSTI
@ -63,7 +63,7 @@ unsigned char *bitdunpack16( const unsigned char *__restrict in, unsigned n, uns
#define DSTI(__op)
#define BPI(__w, __x, __parm) (__parm += zigzagdec32(__w))
#include __FILE__
unsigned char *bitzunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitzunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
//unsigned char *bitzunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
#undef BPI
#undef DSTI
@ -73,8 +73,8 @@ unsigned char *bitzunpack32( const unsigned char *__restrict in, unsigned n, uns
#define BPI(__w, __x, __parm) (__parm + (__w))
#include __FILE__
unsigned char *bitfunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitfunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitfunpack32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
unsigned char *bitfunpack16( const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
#undef BPI
#undef DSTI
@ -82,8 +82,8 @@ unsigned char *bitfunpack16( const unsigned char *__restrict in, unsigned n, uns
#define DSTI(__op) start += 32
#define BPI(__w, __x, __parm) (__parm + (__w)+__x+1)
#include __FILE__
unsigned char *bitf1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitf1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return ip; }
unsigned char *bitf1unpack32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
unsigned char *bitf1unpack16(const unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned start, unsigned b) { const unsigned char *ip = in+PAD8(n*b); BITUNPACK32(in, n, b, out, start); return (unsigned char *)ip; }
#undef BPI
#undef DSTI

View File

@ -78,7 +78,7 @@ unsigned char *bitunpackv32( const unsigned char *__restrict in, unsigned n, uns
const unsigned char *ip = in+PAD8(n*b);
__m128i sv;
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef VSTO0
@ -116,7 +116,7 @@ unsigned char *_bitunpackv32( const unsigned char *__restrict in, unsigned n, un
const unsigned char *ip = in+PAD8(n*b); unsigned m;
__m128i sv;
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef VSTO0
@ -134,7 +134,7 @@ unsigned char *bitzunpackv32( const unsigned char *__restrict in, unsigned n, un
const unsigned char *ip = in+PAD8(n*b);
__m128i sv = _mm_set1_epi32(start);
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef BITUNPACK0
@ -149,7 +149,7 @@ unsigned char *bitdunpackv32( const unsigned char *__restrict in, unsigned n, un
const unsigned char *ip = in+PAD8(n*b);
__m128i sv = _mm_set1_epi32(start);
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef VSTO0
@ -171,7 +171,7 @@ unsigned char *_bitdunpackv32( const unsigned char *__restrict in, unsigned n, u
const unsigned char *ip = in+PAD8(n*b); unsigned m;
__m128i sv = _mm_set1_epi32(start);
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef VSTO0
@ -188,7 +188,7 @@ unsigned char *bitd1unpackv32( const unsigned char *__restrict in, unsigned n, u
const unsigned char *ip = in+PAD8(n*b);
__m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(4,3,2,1);
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef VSTO0
@ -209,7 +209,7 @@ unsigned char *_bitd1unpackv32( const unsigned char *__restrict in, unsigned n,
const unsigned char *ip = in+PAD8(n*b); unsigned m;
__m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(4,3,2,1);
BITUNPACKV32(in, n, b, out, sv);
return ip;
return (unsigned char *)ip;
}
#undef VSTO
#undef VSTO0

136
bitutil.c
View File

@ -1,5 +1,5 @@
/**
Copyright (C) powturbo 2013-2015
Copyright (C) powturbo 2013-2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
@ -33,19 +33,22 @@
_x = (*_p)-__start-__inc; __start = *_p++; __act;\
_x = (*_p)-__start-__inc; __start = *_p++; __act;\
}\
while(_p < __p+__n) { \
while(_p != __p+__n) { \
_x = *_p-__start-__inc; __start = *_p++; __act;\
}\
}
#define BITUNDELTA(__p, __n, __start, __inc) { typeof(__p[0]) *_p;\
#define BITUNDELTA(__p, __n, __start, __inc) {\
typeof(__p[0]) *_p;\
for(_p = __p; _p != __p+(__n&~(4-1)); ) {\
*_p = (__start += (*_p) + __inc); _p++;\
*_p = (__start += (*_p) + __inc); _p++;\
*_p = (__start += (*_p) + __inc); _p++;\
*_p = (__start += (*_p) + __inc); _p++;\
}\
while(_p < __p+__n) { *_p = (__start += (*_p) + __inc); _p++; }\
while(_p != __p+__n) {\
*_p = (__start += (*_p) + __inc); _p++;\
}\
}
#define BITMINMAX(__p,__n, __mi, __mx) {\
@ -56,7 +59,7 @@
if(*_p < __mi) __mi = *_p; if(*_p > __mx) __mx = *_p; _p++; \
if(*_p < __mi) __mi = *_p; if(*_p > __mx) __mx = *_p; _p++; \
}\
while(_p < __p+__n) { \
while(_p != __p+__n) { \
if(*_p < __mi) __mi = *_p; if(*_p > __mx) __mx = *_p; _p++; \
}\
}
@ -65,29 +68,36 @@ unsigned bitdelta32(unsigned *in, unsigned n, unsigned *out, unsigned start, uns
#ifdef __SSE2__
unsigned *ip,b,*op = out;
__m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(inc), dv;
for(ip = in; ip != in+(n&~(4-1)); ip += 4) {
for(ip = in; ip != in+(n&~(4-1)); ip += 4,op += 4) {
__m128i iv = _mm_loadu_si128((__m128i *)ip);
bv = _mm_or_si128(bv, dv = _mm_sub_epi32(DELTA128_32(iv,sv),cv));
sv = iv;
_mm_storeu_si128((__m128i *)op, dv);
op += 4;
}
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
HOR128_32(bv, b);
while(ip < in+n) { unsigned x = *ip-start-inc; start = *ip++; b |= x; *op++ = x; }
while(ip != in+n) {
unsigned x = *ip-start-inc;
start = *ip++;
b |= x;
*op++ = x;
}
#else
typeof(in[0]) b = 0,*op = out; BITDELTA(in, n, inc, start, b |= _x;*op++ = _x);
typeof(in[0]) b = 0,*op = out;
BITDELTA(in, n, inc, start, b |= _x;*op++ = _x);
#endif
return bsr32(b);
}
unsigned bitdelta64(uint64_t *in, unsigned n, uint64_t *out, uint64_t start, unsigned inc) {
typeof(in[0]) b = 0,*op = out; BITDELTA(in, n, inc, start, b |= _x; *op++ = _x);
typeof(in[0]) b = 0,*op = out;
BITDELTA(in, n, inc, start, b |= _x; *op++ = _x);
return bsr64(b);
}
unsigned bit32(unsigned *in, unsigned n) {
typeof(in[0]) b; BITSIZE32(in, n, b);
typeof(in[0]) b;
BITSIZE32(in, n, b);
return b;
}
@ -119,13 +129,14 @@ unsigned bitd32(unsigned *in, unsigned n, unsigned start) {
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
HOR128_32(bv, b);
while(ip < in+n) {
while(ip != in+n) {
unsigned x = *ip-start;
start = *ip++;
b |= x;
}
#else
typeof(in[0]) b = 0; BITDELTA(in,n, 0, start, b |= _x);
typeof(in[0]) b = 0;
BITDELTA(in,n, 0, start, b |= _x);
#endif
return bsr32(b);
}
@ -141,13 +152,14 @@ unsigned bitd132(unsigned *in, unsigned n, unsigned start) {
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
HOR128_32(bv, b);
while(ip < in+n) {
while(ip != in+n) {
unsigned x = *ip-start-1;
start = *ip++;
b |= x;
}
#else
typeof(in[0]) b = 0; BITDELTA(in, n, 1, start, b |= _x);
typeof(in[0]) b = 0;
BITDELTA(in, n, 1, start, b |= _x);
#endif
return bsr32(b);
}
@ -159,14 +171,13 @@ void bitund132(unsigned *p, unsigned n, unsigned x) {
#ifdef __SSE2__
__m128i sv = _mm_set1_epi32(x), cv = _mm_set_epi32(4,3,2,1);
unsigned *ip;
for(ip = p; ip != p+(n&~(4-1)); ) {
for(ip = p; ip != p+(n&~(4-1)); ip += 4) {
__m128i v = _mm_loadu_si128((__m128i *)ip);
SCANI128_32(v, sv, cv);
_mm_storeu_si128((__m128i *)ip, sv);
ip += 4;
}
x = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
while(ip < p+n) {
while(ip != p+n) {
*ip = (x += (*ip) + 1);
ip++;
}
@ -188,18 +199,21 @@ void bitundx64(uint64_t *p, unsigned n, uint64_t x, unsigned inc) { BITUNDELTA(p
_x = ((int)(*_p)-(int)__start); _x = (_x << 1) ^ (_x >> (sizeof(_x)*8-1)); __start = *_p++; __act;\
}\
while(_p != __p+__n) { \
_x = ((int)(*_p)-(int)__start); _x = (_x << 1) ^ (_x >> (sizeof(_x)*8-1)); __start = *_p++; __act;\
_x = ((int)(*_p)-(int)__start); _x = (_x << 1) ^ (_x >> (sizeof(_x)*8-1)); __start = *_p++; __act;\
}\
}
#define BITUNZIGZAG(__p, __n, __start) { typeof(__p[0]) *_p, _z;\
#define BITUNZIGZAG(__p, __n, __start) {\
typeof(__p[0]) *_p, _z;\
for(_p = __p; _p != __p+(__n&~(4-1)); ) {\
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
}\
while(_p != __p+__n) { _z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++; }\
while(_p != __p+__n) {\
_z = *_p; *_p = (__start += (_z >> 1 ^ -(_z & 1))); _p++;\
}\
}
unsigned bitz32(unsigned *in, unsigned n, unsigned start) {
@ -216,10 +230,15 @@ unsigned bitz32(unsigned *in, unsigned n, unsigned start) {
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
HOR128_32(bv, b);
while(ip != in+n) {
int x = ((int)(*ip)-(int)start); x = (x << 1) ^ (x >> 31); start = *ip++; b |= x;
int x = ((int)(*ip)-(int)start);
x = (x << 1) ^ (x >> 31);
start = *ip++;
b |= x;
}
#else
typeof(in[0]) b = 0,*op = out; int _x; BITZIGZAG(in, n, start, b |= (unsigned)_x);
typeof(in[0]) b = 0,*op = out;
int _x;
BITZIGZAG(in, n, start, b |= (unsigned)_x);
#endif
return bsr32(b);
}
@ -228,22 +247,27 @@ unsigned bitzigzag32(unsigned *in, unsigned n, unsigned *out, unsigned start) {
#ifdef __SSE2__
unsigned *ip,b,*op = out;
__m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start), dv;
for(ip = in; ip != in+(n&~(4-1)); ip += 4) {
for(ip = in; ip != in+(n&~(4-1)); ip += 4,op += 4) {
__m128i iv = _mm_loadu_si128((__m128i *)ip);
dv = DELTA128_32(iv,sv);
sv = iv;
dv = ZIGZAG128_32(dv);
bv = _mm_or_si128(bv, dv);
_mm_storeu_si128((__m128i *)op, dv);
op += 4;
}
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
HOR128_32(bv, b);
while(ip != in+n) {
int x = ((int)(*ip)-(int)start); x = (x << 1) ^ (x >> 31); start = *ip++; b |= x; *op++ = x;
int x = ((int)(*ip)-(int)start);
x = (x << 1) ^ (x >> 31);
start = *ip++;
b |= x;
*op++ = x;
}
#else
typeof(in[0]) b = 0,*op = out; int _x; BITZIGZAG(in, n, start, b |= (unsigned)_x; *op++ = _x);
typeof(in[0]) b = 0, *op = out;
int _x;
BITZIGZAG(in, n, start, b |= (unsigned)_x; *op++ = _x);
#endif
return bsr32(b);
}
@ -252,61 +276,81 @@ void bitunzigzag32(unsigned *p, unsigned n, unsigned start) {
#ifdef __SSE2__
__m128i sv = _mm_set1_epi32(start); //, c1 = _mm_set1_epi32(1), cz = _mm_setzero_si128();
unsigned *ip;
for(ip = p; ip != p+(n&~(4-1)); ) {
for(ip = p; ip != p+(n&~(4-1)); ip += 4) {
__m128i iv = _mm_loadu_si128((__m128i *)ip);
iv = UNZIGZAG128_32(iv);
SCAN128_32(iv, sv);
_mm_storeu_si128((__m128i *)ip, sv);
ip += 4;
}
start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12));
while(ip != p+n) {
unsigned z = *ip; *ip = (start += (z >> 1 ^ -(z & 1))); ip++;
unsigned z = *ip;
*ip++ = (start += (z >> 1 ^ -(z & 1)));
}
#else
BITUNZIGZAG(p, n, start);
#endif
}
unsigned bitzigzag64(unsigned *in, unsigned n, unsigned *out, unsigned start) {
typeof(in[0]) b = 0,*op = out; long long _x; BITZIGZAG(in, n, start, b |= (unsigned long long)_x; *op++ = _x);
unsigned bitzigzag64(uint64_t *in, unsigned n, uint64_t *out, unsigned start) {
typeof(in[0]) b = 0,*op = out;
long long _x;
BITZIGZAG(in, n, start, b |= (unsigned long long)_x; *op++ = _x);
return bsr32(b);
}
void bitunzigzag64(unsigned *p, unsigned n, unsigned start) {
void bitunzigzag64(uint64_t *p, unsigned n, unsigned start) {
BITUNZIGZAG(p, n, start);
}
//------------------- De-/Compose Floating Point -----------------------------------------
void bitdouble(double *in, unsigned n, unsigned *sgn, unsigned *expo, uint64_t *mant) {
void bitdouble(double *in, unsigned n, int *expo, uint64_t *mant) {
double *ip;
uint64_t u;
for(ip = in; ip < in+n; ip++) {
u = *(uint64_t *)ip; BITFLOAT(u, *sgn++, *expo++, *mant++, DMANT_BITS, 1ull);
uint64_t u = *(uint64_t *)ip;
*expo++ = FLTEXPO(u, DMANT_BITS, 1ull);
*mant++ = FLTMANT(u, DMANT_BITS, 1ull);
}
}
void bitundouble(unsigned *sgn, unsigned *expo, uint64_t *mant, unsigned n, double *out) {
void bitundouble(int *expo, uint64_t *mant, unsigned n, double *out) {
double *op;
uint64_t u;
for(op = out; op < out+n; op++) {
BITUNFLOAT((uint64_t)(*sgn++), (uint64_t)(*expo++), *mant++, u, DMANT_BITS); *op = *(double *)&u;
for(op = out; op < out+n; ) {
BITUNFLOAT( (int64_t)(*expo++), *mant++, u, DMANT_BITS); *op++ = *(double *)&u;
}
}
void bitfloat(float *in, unsigned n, unsigned *sgn, unsigned *expo, unsigned *mant) {
float *ip;
unsigned u;
void bitzdouble(double *in, unsigned n, int *expo, uint64_t *mant) {
double *ip;
for(ip = in; ip < in+n; ip++) {
u = *(unsigned *)ip; BITFLOAT(u, *sgn++, *expo++, *mant++, FMANT_BITS, 1u);
uint64_t u = *(uint64_t *)ip;
*expo++ = zigzagenc32((int)FLTEXPO(u, DZMANT_BITS, 1ull)-1023);
*mant++ = FLTMANT(u, DZMANT_BITS, 1ull);
}
}
void bitunfloat(unsigned *sgn, unsigned *expo, unsigned *mant, unsigned n, float *out) {
void bitzundouble(int *expo, uint64_t *mant, unsigned n, double *out) {
double *op;
uint64_t u;
for(op = out; op < out+n; ) {
BITUNFLOAT( (int64_t)zigzagdec32(*expo++)+1023, *mant++, u, DZMANT_BITS); *op++ = *(double *)&u;
}
}
void bitfloat(float *in, unsigned n, int *expo, unsigned *mant) {
float *ip;
for(ip = in; ip < in+n; ip++) {
unsigned u = *(unsigned *)ip;
*expo++ = FLTEXPO(u, FMANT_BITS, 1u);
*mant++ = FLTMANT(u, FMANT_BITS, 1u);
}
}
void bitunfloat(int *expo, unsigned *mant, unsigned n, float *out) {
float *op;
unsigned u;
for(op = out; op < out+n; op++) {
BITUNFLOAT((*sgn++), (*expo++), *mant++, u, FMANT_BITS); *op = *(float *)&u;
BITUNFLOAT( (*expo++), *mant++, u, FMANT_BITS); *op = *(float *)&u;
}
}

149
bitutil.h
View File

@ -1,5 +1,5 @@
/**
Copyright (C) powturbo 2013-2015
Copyright (C) powturbo 2013-2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
@ -24,89 +24,103 @@
// bitutil.h - "Integer Compression"
#include <stdint.h>
#define _BITFORZERO(out, n, start, inc) do {\
for(i = 0; i != (n&~3); ) {\
out[i] = start+i*inc; i++;\
out[i] = start+i*inc; i++;\
out[i] = start+i*inc; i++;\
out[i] = start+i*inc; i++;\
}\
while(i < n) out[i] = start+i*inc,++i;\
#define _BITFORZERO(_out_, _n_, _start_, _inc_) do { unsigned _i;\
for(_i = 0; _i != (_n_&~3); ) {\
_out_[_i] = _start_+_i*_inc_; _i++;\
_out_[_i] = _start_+_i*_inc_; _i++;\
_out_[_i] = _start_+_i*_inc_; _i++;\
_out_[_i] = _start_+_i*_inc_; _i++;\
}\
while(_i != _n_)\
_out_[_i] = _start_+_i*_inc_, ++_i;\
} while(0)
#define BITSIZE(__in, __n, __b, __usize) { typeof(__in[0]) *_ip;\
for(__b=0,_ip = __in; _ip != __in+(__n&~(4-1)); )\
__b |= *_ip++ | *_ip++ | *_ip++ | *_ip++;\
while(_ip != __in+__n) __b |= *_ip++;\
__b = TEMPLATE(bsr, __usize)(__b);\
#define BITSIZE(_in_, _n_, _b_, _usize_) { typeof(_in_[0]) *_ip;\
for(_b_=0,_ip = _in_; _ip != _in_+(_n_&~(4-1)); )\
_b_ |= *_ip++ | *_ip++ | *_ip++ | *_ip++;\
while(_ip != _in_+_n_) \
_b_ |= *_ip++;\
_b_ = TEMPLATE(bsr, _usize_)(_b_);\
}
static inline unsigned zigzagenc32(int x) { return x << 1 ^ x >> 31; }
static inline unsigned zigzagdec32(unsigned x) { return x >> 1 ^ -(x & 1); }
static inline unsigned zigzagenc31(int x) { x = (x << 2 | ((x>>30)& 2)) ^ x >> 31; return x; }
static inline unsigned zigzagdec31(unsigned x) { return (x >> 2 | (x& 2)<<30 ) ^ -(x & 1); }
static inline unsigned zigzagenc32(int x) { return x << 1 ^ x >> 31; }
static inline unsigned zigzagdec32(unsigned x) { return x >> 1 ^ -(x & 1); }
static inline uint64_t zigzagenc64(int64_t x) { return x << 1 ^ x >> 63; }
static inline uint64_t zigzagdec64(uint64_t x) { return x >> 1 ^ -(x & 1); }
#ifdef __SSE2__
#include <emmintrin.h>
// SIMD Delta
#define DELTA128_32(_v_, _sv_) _mm_sub_epi32(_v_, _mm_or_si128(_mm_srli_si128(_sv_, 12), _mm_slli_si128(_v_, 4)))
#define DELTA128_32(__v, __sv) _mm_sub_epi32(__v, _mm_or_si128(_mm_srli_si128(__sv, 12), _mm_slli_si128(__v, 4)))
// SIMD Scan ( prefix sum )
#define SCAN128_32( _v_, _sv_) _v_ = _mm_add_epi32(_v_, _mm_slli_si128(_v_, 4)); _sv_ = _mm_add_epi32(_mm_shuffle_epi32(_sv_, _MM_SHUFFLE(3, 3, 3, 3)), _mm_add_epi32(_mm_slli_si128(_v_, 8), _v_) )
#define SCANI128_32(_v_, _sv_, _vi_) SCAN128_32(_v_, _sv_); _sv_ = _mm_add_epi32(_sv_, _vi_)
#define SCAN128_32( __v, __sv) __v = _mm_add_epi32(__v, _mm_slli_si128(__v, 4)); __sv = _mm_add_epi32(_mm_shuffle_epi32(__sv, _MM_SHUFFLE(3, 3, 3, 3)), _mm_add_epi32(_mm_slli_si128(__v, 8), __v) )
#define SCANI128_32(__v, __sv, __vi) SCAN128_32(__v, __sv); __sv = _mm_add_epi32(__sv, __vi)
// SIMD ZigZag
#define ZIGZAG128_32(_v_) _mm_xor_si128(_mm_slli_epi32(_v_,1), _mm_srai_epi32(_v_,31))
#define UNZIGZAG128_32(_v_) _mm_xor_si128(_mm_srli_epi32(_v_,1), _mm_srai_epi32(_mm_slli_epi32(_v_,31),31) ) //_mm_sub_epi32(cz, _mm_and_si128(iv,c1))
#define ZIGZAG128_32(__v) _mm_xor_si128(_mm_slli_epi32(__v,1), _mm_srai_epi32(__v,31))
#define UNZIGZAG128_32(__v) _mm_xor_si128(_mm_srli_epi32(__v,1), _mm_srai_epi32(_mm_slli_epi32(__v,31),31) ) //_mm_sub_epi32(cz, _mm_and_si128(iv,c1))
// SIMD Horizontal OR
#define HOR128_32(__v,__b) __v = _mm_or_si128(__v, _mm_srli_si128(__v, 8)); __v = _mm_or_si128(__v, _mm_srli_si128(__v, 4)); __b = (unsigned)_mm_cvtsi128_si32(__v)
#define HOR128_32(_v_,_b_) _v_ = _mm_or_si128(_v_, _mm_srli_si128(_v_, 8)); _v_ = _mm_or_si128(_v_, _mm_srli_si128(_v_, 4)); _b_ = (unsigned)_mm_cvtsi128_si32(_v_)
#define BITSIZE32(__in, __n, __b) { typeof(__in[0]) *_ip; __m128i v = _mm_setzero_si128();\
for(_ip = __in; _ip != __in+(__n&~(4-1)); _ip+=4) v = _mm_or_si128(v, _mm_loadu_si128((__m128i*)_ip));\
HOR128_32(v,__b);\
while(_ip != __in+__n) __b |= *_ip++;\
__b = bsr32(__b);\
#define BITSIZE32(_in_, _n_, _b_) { typeof(_in_[0]) *_ip; __m128i _v = _mm_setzero_si128();\
for(_ip = _in_; _ip != _in_+(_n_&~(4-1)); _ip+=4)\
_v = _mm_or_si128(_v, _mm_loadu_si128((__m128i*)_ip));\
HOR128_32(_v,_b_);\
while(_ip != _in_+_n_)\
_b_ |= *_ip++;\
_b_ = bsr32(_b_);\
}
#define BITZERO32(out, n, start) do {\
__m128i sv = _mm_set1_epi32(start), *ov = (__m128i *)(out), *ove = (__m128i *)(out + n);\
do { _mm_storeu_si128(ov++, sv); } while(ov < ove); \
// SIMD set value
#define BITZERO32(_out_, _n_, _start_) do {\
__m128i _sv_ = _mm_set1_epi32(_start_), *_ov = (__m128i *)(_out_), *_ove = (__m128i *)(_out_ + _n_);\
do _mm_storeu_si128(_ov++, _sv_); while(_ov < _ove); \
} while(0)
#define BITFORZERO32(out, n, start, inc) do {\
__m128i sv = _mm_set1_epi32(start), *ov=(__m128i *)(out), *ove = (__m128i *)(out + n), cv = _mm_set_epi32(3*inc,2*inc,1*inc,0); \
sv = _mm_add_epi32(sv, cv);\
cv = _mm_set1_epi32(4);\
do { _mm_storeu_si128(ov++, sv); sv = _mm_add_epi32(sv, cv); } while(ov < ove);\
#define BITFORZERO32(_out_, _n_, _start_, _inc_) do {\
__m128i _sv = _mm_set1_epi32(_start_), *_ov=(__m128i *)(_out_), *_ove = (__m128i *)(_out_ + _n_), _cv = _mm_set_epi32(3*_inc_,2*_inc_,1*_inc_,0); \
_sv = _mm_add_epi32(_sv, _cv);\
_cv = _mm_set1_epi32(4);\
do { _mm_storeu_si128(_ov++, _sv); _sv = _mm_add_epi32(_sv, _cv); } while(_ov < _ove);\
} while(0)
#define BITDIZERO32(out, n, start, inc) do { __m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(3+inc,2+inc,1+inc,inc), *ov=(__m128i *)(out), *ove = (__m128i *)(out + n);\
sv = _mm_add_epi32(sv, cv); cv = _mm_set1_epi32(4*inc); do { _mm_storeu_si128(ov++, sv), sv = _mm_add_epi32(sv, cv); } while(ov < ove);\
#define BITDIZERO32(_out_, _n_, _start_, _inc_) do { __m128i _sv = _mm_set1_epi32(_start_), _cv = _mm_set_epi32(3+_inc_,2+_inc_,1+_inc_,_inc_), *_ov=(__m128i *)(_out_), *_ove = (__m128i *)(_out_ + _n_);\
_sv = _mm_add_epi32(_sv, _cv); _cv = _mm_set1_epi32(4*_inc_); do { _mm_storeu_si128(_ov++, _sv), _sv = _mm_add_epi32(_sv, _cv); } while(_ov < _ove);\
} while(0)
#else
#define BITSIZE32(__in, __n, __b) BITSIZE(__in, __n, __b, 32)
#define BITFORZERO32(out, n, start, inc) _BITFORZERO(out, n, start, inc)
#define BITZERO32(out, n, start) _BITFORZERO(out, n, start, 0)
#define BITSIZE32(_in_, _n_, _b_) BITSIZE(_in_, _n_, _b_, 32)
#define BITFORZERO32(_out_, _n_, _start_, _inc_) _BITFORZERO(_out_, _n_, _start_, _inc_)
#define BITZERO32(_out_, _n_, _start_) _BITFORZERO(_out_, _n_, _start_, 0)
#endif
#define DELTR( __in, __n, __mode, __out) { unsigned _v; for( __out[0]=__in[0],_v = 1; _v < __n; _v++) __out[_v] = (__in[_v] - __out[0]) - _v*__mode; }
#define DELTRB(__in, __n, __mode, __b, __out) { unsigned _v; for(__b=0,__out[0]=__in[0],_v = 1; _v < __n; _v++) __out[_v] = (__in[_v] - __out[0]) - _v*__mode, __b |= __out[_v]; __b = bsr32(__b); }
#define DELTR( _in_, _n_, _mode_, _out_) { unsigned _v; for( _out_[0]=_in_[0],_v = 1; _v < _n_; _v++) _out_[_v] = (_in_[_v] - _out_[0]) - _v*_mode_; }
#define DELTRB(_in_, _n_, _mode_, _b_, _out_) { unsigned _v; for(_b_=0,_out_[0]=_in_[0],_v = 1; _v < _n_; _v++) _out_[_v] = (_in_[_v] - _out_[0]) - _v*_mode_, _b_ |= _out_[_v]; _b_ = bsr32(_b_); }
#ifdef __cplusplus
extern "C" {
#endif
// get maximum bit length of the elements in the integer array
//------------- get maximum bit length of the elements in the integer array -----------------------
unsigned bit32( unsigned *in, unsigned n);
// transform sorted integer array to delta array. inc = increment
//------------- Delta for sorted integer array ----------------------------------------------------
//-- transform sorted integer array to delta array. inc = increment: out[i] = in[i] - in[i-1] - inc
unsigned bitdelta32(unsigned *in, unsigned n, unsigned *out, unsigned start, unsigned inc);
unsigned bitdelta64(uint64_t *in, unsigned n, uint64_t *out, uint64_t start, unsigned inc);
// get delta maximum bit length of the non decreasing integer array
//-- get delta maximum bit length of the non decreasing integer array. out[i] = in[i] - in[i-1]
unsigned bitd32( unsigned *in, unsigned n, unsigned start);
// get delta maximum bit length of the non strictly decreasing integer array
//-- get delta maximum bit length of the non strictly decreasing integer array. out[i] = in[i] - in[i-1] - 1
unsigned bitd132( unsigned *in, unsigned n, unsigned start);
//-- in-place reverse delta transform
void bitund32( unsigned *p, unsigned n, unsigned x);
void bitund64( uint64_t *p, unsigned n, uint64_t x);
@ -115,32 +129,47 @@ void bitundx64( uint64_t *p, unsigned n, uint64_t x, unsigned inc);
void bitund132( unsigned *p, unsigned n, unsigned x);
// for
//------------- FOR array bit length: out[i] = in[i] - start -------------------------------------
unsigned bitf32( unsigned *in, unsigned n, unsigned start); // sorted
unsigned bitf132( unsigned *in, unsigned n, unsigned start);
unsigned bitfm32( unsigned *in, unsigned n, unsigned *pmin); // unsorted
unsigned bitf1m32( unsigned *in, unsigned n, unsigned *pmin);
// zigzag encoding for unsorted integer lists
//------------- Zigzag encoding for unsorted integer lists: out[i] = in[i] - in[i-1] -------------
//-- get maximum zigzag bit length integer array
unsigned bitz32( unsigned *in, unsigned n, unsigned start);
//-- Zigzag transform
unsigned bitzigzag32(unsigned *in, unsigned n, unsigned *out, unsigned start);
unsigned bitzigzag64(unsigned *in, unsigned n, unsigned *out, unsigned start);
unsigned bitzigzag64(uint64_t *in, unsigned n, uint64_t *out, unsigned start);
//-- Zigzag reverse transform
void bitunzigzag32( unsigned *p, unsigned n, unsigned start);
void bitunzigzag64( unsigned *p, unsigned n, unsigned start);
void bitunzigzag64( uint64_t *p, unsigned n, unsigned start);
//---- Floating point to Integer de-/composition ---------------------------------
#define FMANT_BITS 16
#define DMANT_BITS 32
#define DZMANT_BITS 36
#define FMANT_BITS 23
#define DMANT_BITS 52
#define BITFLOAT(__u, __sgn, __expo, __mant, __mantbits, __one) __sgn = __u >> (sizeof(__u)*8-1); __expo = ((__u >> (__mantbits)) & ( (__one<<(sizeof(__u)*8 - 1 - __mantbits)) -1)); __mant = __u & ((__one<<__mantbits)-1);
#define BITUNFLOAT( __sgn, __expo, __mant, __u, __mantbits) __u = (__sgn) << (sizeof(__u)*8-1) | (__expo) << __mantbits | (__mant)
#define FLTEXPO(__u,__mantbits, __one) ( ((__u) >> __mantbits) & ( (__one<<(sizeof(__u)*8 - __mantbits)) - 1 ) )
#define FLTMANT(__u,__mantbits, __one) ((__u) & ((__one<<__mantbits)-1))
#define BITUNFLOAT(__expo, __mant, __u, __mantbits) __u = ((__expo) << __mantbits) | (__mant)//>>1 | (__mant)<<(sizeof(__u)*8 - 1)
/*#define BITFLOAT(__u, __sgn, __expo, __mant, __mantbits, __one) __sgn = __u >> (sizeof(__u)*8-1); __expo = EXPO(__u,__mantbits; __mant = __u & ((__one<<__mantbits)-1)
#define BITUNFLOAT( __sgn, __expo, __mant, __u, __mantbits) __u = (__sgn) << (sizeof(__u)*8-1) | (__expo) << __mantbits | (__mant) */
// De-/Compose floating point array to/from integer arrays (sign,exponent,mantissa) for using with "Integer Compression" functions ------------
void bitdouble( double *in, unsigned n, unsigned *sgn, unsigned *expo, uint64_t *mant);
void bitundouble( unsigned *sgn, unsigned *expo, uint64_t *mant, unsigned n, double *out);
void bitfloat( float *in, unsigned n, unsigned *sgn, unsigned *expo, unsigned *mant);
void bitunfloat( unsigned *sgn, unsigned *expo, unsigned *mant, unsigned n, float *out);
void bitdouble( double *in, unsigned n, int *expo, uint64_t *mant);
void bitundouble( int *expo, uint64_t *mant, unsigned n, double *out);
void bitzdouble( double *in, unsigned n, int *expo, uint64_t *mant);
void bitzundouble( int *expo, uint64_t *mant, unsigned n, double *out);
void bitfloat( float *in, unsigned n, int *expo, unsigned *mant);
void bitunfloat( int *expo, unsigned *mant, unsigned n, float *out);
#ifdef __cplusplus
}

20
conf.h
View File

@ -38,13 +38,19 @@
#define popcnt64(_x_) __builtin_popcountll(_x_)
#if defined(__i386__) || defined(__x86_64__)
static inline int __bsr32(int x) { asm("bsr %1,%0" : "=r" (x) : "rm" (x) ); return x; }
static inline int bsr32( int x) { int b = -1; asm("bsrl %1,%0" : "+r" (b) : "rm" (x) ); return b + 1; }
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
#define bsr16(_x_) bsr32(_x_)
static inline int __bsr32( int x) { asm("bsr %1,%0" : "=r" (x) : "rm" (x) ); return x; }
static inline int bsr32( int x) { int b = -1; asm("bsrl %1,%0" : "+r" (b) : "rm" (x) ); return b + 1; }
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
#define bsr16(_x_) bsr32(_x_)
static inline unsigned rol32(unsigned x, int s) { asm ("roll %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
static inline unsigned ror32(unsigned x, int s) { asm ("rorl %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
#else
static inline int bsr32(int x ) { return x?32 - __builtin_clz( x):0; }
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
static inline int bsr32(int x ) { return x?32 - __builtin_clz( x):0; }
static inline int bsr64(unsigned long long x) { return x?64 - __builtin_clzll(x):0; }
static inline unsigned rol32(unsigned x, int s) { return x << s | x >> (32 - s); }
static inline unsigned ror32(unsigned x, int s) { return x >> s | x << (32 - s); }
#endif
#define ctz64(_x_) __builtin_ctzll(_x_)
@ -65,6 +71,8 @@ static inline int bsr64(unsigned long long x) { unsigned long z = 0; _BitScanFor
static inline int ctz64(unsigned long long x) { unsigned long z = 0; _BitScanForward64(&z, x); return z; }
#endif
static inline int ctz32(unsigned x) { unsigned z = 0; _BitScanForward(&z, x); return z; }
#define rol32(x,s) _lrotl(x, s)
#define ror32(x,s) _lrotr(x, s)
#define fseeko _fseeki64
#define ftello _ftelli64
#define sleep(x) Sleep(x/1000)

View File

@ -1,5 +1,5 @@
/**
Copyright (C) powturbo 2013-2015
Copyright (C) powturbo 2013-2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
@ -1080,8 +1080,8 @@ int main(int argc, char *argv[]) { int r;
uint64_t *mantissa = malloc(n*sizeof(mantissa[0]));
unsigned *sign = malloc(n*sizeof(sign[0]));
unsigned *exp = malloc(n*sizeof(exp[0])); if(!mantissa || !exp || !sign || !dcpy) die("alloc error\n");
bitdouble( din, n, sign, exp, mantissa);
bitundouble( sign, exp, mantissa, n, dcpy);
bitdouble( din, n, exp, mantissa);
bitundouble( exp, mantissa, n, dcpy);
int i; for(i=0;i < n; i++) { printf("%d,%d,%llu,%e,%e\n", sign[i], exp[i],(long long unsigned int)mantissa[i], din[i], dcpy[i]); if(din[i]!=dcpy[i]) die("check error at %d %e %e\n", i, din[i], dcpy[i]); }
free(din); free(mantissa); free(exp); free(sign); free(dcpy);
exit(0);

186
vint.c
View File

@ -1,5 +1,5 @@
/**
Copyright (C) powturbo 2013-2015
Copyright (C) powturbo 2013-2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
@ -22,67 +22,45 @@
- email : powturbo [_AT_] gmail [_DOT_] com
**/
// vint.c - "Integer Compression" variable byte
#include <stdio.h>
#include <stdio.h>
#include "conf.h"
#include "vint.h"
#include "bitutil.h"
#define _vbputu32(__op, __x, __act) {\
if(likely(__x < (1<< 7))) { *__op++ = __x << 1; __act;}\
else if(likely(__x < (1<<14))) { *(unsigned short *)__op = __x << 2 | 0x01; __op += 2; __act;}\
else if(likely(__x < (1<<21))) { *(unsigned *)__op = __x << 3 | 0x03; __op += 3; __act;}\
else if(likely(__x < (1<<28))) { *(unsigned *)__op = __x << 4 | 0x07; __op += 4; __act;}\
else { *(unsigned *)__op = __x << 4 | 0x0f; __op += 4; *__op++ = __x >> 28; __act;}\
}
#define _vbgetu32(__ip, __x, __act) do {\
if(!((__x = *__ip) & (1<<0))) { __ip++; __x >>= 1; __act;}\
else if(!(__x & (1<<1))) { __x = (*(unsigned short *)__ip) >> 2; __ip += 2; __act;}\
else if(!(__x & (1<<2))) { __x = (*(unsigned *)__ip & 0xffffffu) >> 3; __ip += 3; __act;}\
else if(!(__x & (1<<3))) { __x = (*(unsigned *)__ip) >> 4; __ip += 4; __act;}\
else { __x = (unsigned long long)(*(unsigned *)__ip) >> 4 | (unsigned long long)(__ip[4]) << 28; __ip += 5; __act;}\
} while(0)
#define vbputu32(__op, __x) { unsigned _x_ = __x; _vbputu32(__op, _x_, ;); }
//-------------------------------------- variable byte : 32 bits ----------------------------------------------------------------
#if defined(__AVX2__) && defined(__AVX2__VINT)
#include <immintrin.h>
#define M1 0xfeull //7
#define M2 0xfffcull //14
#define M3 0xfffff8ull //21
#define M4 0xfffffff0ull //28
#define M5 0xfffffffff0ull //36
//0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
unsigned long long mtab[] = { M1, M2, M1, M3, M1, M2, M1, M4, M1, M2, M1, M3, M1, M2, M1, M5 };
#endif
//------------------------------------------------------------------------------------------------------------------------
//0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111
unsigned char vtab[] = { 1, 2, 1, 3, 1, 2, 1, 4, 1, 2, 1, 3, 1, 2, 1, 5 };
unsigned char vtab[] = { 1, 1, 1, 1, 1, 1, 1, 1, 5, 4, 3, 3, 2, 2, 2, 2 };
// decompress buffer into an array of n unsigned values. Return value = end of decompressed buffer in
unsigned char *vbdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out) { unsigned x,*op;
for(op = out; op != out+(n&~(4-1)); op += 4) {
_vbgetu32(in, x, op[0] = x);
_vbgetu32(in, x, op[1] = x);
_vbgetu32(in, x, op[2] = x);
_vbgetu32(in, x, op[3] = x);
unsigned char *vbdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out) { register unsigned x, *op;
for(op = out; op != out+(n&~(8-1)); op += 8) {
_vbget32(in, x, op[0] = x);
_vbget32(in, x, op[1] = x);
_vbget32(in, x, op[2] = x);
_vbget32(in, x, op[3] = x); __builtin_prefetch(in+256, 0);
_vbget32(in, x, op[4] = x);
_vbget32(in, x, op[5] = x);
_vbget32(in, x, op[6] = x);
_vbget32(in, x, op[7] = x);
}
while(op != out+n) { _vbgetu32(in, x, ; ); *op++ = x; }
while(op != out+n) _vbget32(in, x, *op++ = x );
return in;
}
// encode array with n unsigned (32 bits in[n]) values to the buffer out. Return value = end of compressed buffer out
unsigned char *vbenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out) { unsigned *ip;
for(ip = in; ip != in+(n&~(4-1)); ) {
vbputu32(out, *ip++);
vbputu32(out, *ip++);
vbputu32(out, *ip++);
vbputu32(out, *ip++);
unsigned char *vbenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out) { register unsigned x, *ip;
for(ip = in; ip != in+(n&~(8-1)); ip += 8) { __builtin_prefetch(ip+128, 0);
x = ip[0]; _vbput32(out, x, ;);
x = ip[1]; _vbput32(out, x, ;);
x = ip[2]; _vbput32(out, x, ;);
x = ip[3]; _vbput32(out, x, ;);
x = ip[4]; _vbput32(out, x, ;);
x = ip[5]; _vbput32(out, x, ;);
x = ip[6]; _vbput32(out, x, ;);
x = ip[7]; _vbput32(out, x, ;);
}
while(ip != in+n) vbputu32(out, *ip++);
while(ip != in+n) { x = *ip++; _vbput32(out, x, ;); }
return out;
}
@ -113,28 +91,28 @@ unsigned char *vbenc64(uint64_t *__restrict in, unsigned n, unsigned char *__res
unsigned char *vbdenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start) {
unsigned *ip,v;
for(ip = in; ip != in+(n&~(4-1)); ) {
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbputu32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
v = (*ip)-start; start=*ip++; _vbput32(out, v, ;);
}
while(ip < in+n) { v = (*ip)-start; start = *ip++; _vbputu32(out, v, ;); }
while(ip < in+n) { v = (*ip)-start; start = *ip++; _vbput32(out, v, ;); }
return out;
}
unsigned char *vbddec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start) {
unsigned x,*op;
for(op = out; op != out+(n&~(8-1)); ) {
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbgetu32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
_vbget32(in, x, ;); *op++ = (start += x);
}
while(op != out+n) _vbgetu32(in, x, *op++ = (start += x));
while(op != out+n) _vbget32(in, x, *op++ = (start += x));
return in;
}
@ -147,21 +125,21 @@ unsigned char *vbd1enc32(unsigned *__restrict in, unsigned n, unsigned char *__r
v = in[0] - start - 1;
unsigned long long u = (unsigned long long)v<<1;
if(n == 1) u |= 1;
_vbputu32(op, u, ;);
_vbput32(op, u, ;);
if(!--n) return op;
start = *in++;
#endif
for(ip = in; ip != in + (n&~(4-1)); ) {
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v;
}
while(ip != in+n) { v = (*ip)-start-1; start = *ip++; _vbputu32(op, v, ;); b |= v; }
while(ip != in+n) { v = (*ip)-start-1; start = *ip++; _vbput32(op, v, ;); b |= v; }
#ifdef VINT_Z
if(!b) {
u = (unsigned long long)in[-1] << 1 | 1;
_vbputu32(out, u, ;);
_vbput32(out, u, ;);
return out;
}
#endif
@ -171,7 +149,7 @@ unsigned char *vbd1enc32(unsigned *__restrict in, unsigned n, unsigned char *__r
unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start) {
unsigned x,*op;
#ifdef VINT_Z
unsigned long long u; _vbgetu32(in, u, ;); x = u>>1; *out = (start += x+1);
unsigned long long u; _vbget32(in, u, ;); x = u>>1; *out = (start += x+1);
if(u & 1) {
#ifdef __SSE2__
out++; --n; BITDIZERO32(out, n, start, 1);
@ -184,16 +162,16 @@ unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned *__r
#endif
for(op = out; op != out+(n&~(8-1)); ) {
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbgetu32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
_vbget32(in, x, ++x); *op++ = (start += x);
}
while(op != out+n) { _vbgetu32(in, x, ++x); *op++ = (start += x); }
while(op != out+n) { _vbget32(in, x, ++x); *op++ = (start += x); }
return in;
}
@ -208,27 +186,51 @@ unsigned char *vbdec16(unsigned char *__restrict in, unsigned n, unsigned short
unsigned char *vbzenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start) {
unsigned *ip,v;
for(ip = in; ip != in+(n&~(4-1)); ) {
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbputu32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
v = zigzagenc32((*ip)-start); start=*ip++; _vbput32(out, v, ;);
}
while(ip < in+n) { v = zigzagenc32((*ip)-start); start = *ip++; _vbputu32(out, v, ;); }
while(ip < in+n) { v = zigzagenc32((*ip)-start); start = *ip++; _vbput32(out, v, ;); }
return out;
}
unsigned char *vbzdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start) {
unsigned x,*op;
for(op = out; op != out+(n&~(8-1)); ) {
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbgetu32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
_vbget32(in, x, ;); *op++ = (start += zigzagdec32(x));
}
while(op != out+n) _vbgetu32(in, x, *op++ = (start += zigzagdec32(x)));
while(op != out+n) _vbget32(in, x, *op++ = (start += zigzagdec32(x)));
return in;
}
unsigned char *vbzenc64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start) {
uint64_t *ip,v;
for(ip = in; ip != in+(n&~(4-1)); ) {
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
v = zigzagenc64((*ip)-start); start=*ip++; _vbput64(out, v, ;);
}
while(ip < in+n) { v = zigzagenc64((*ip)-start); start = *ip++; _vbput64(out, v, ;); }
return out;
}
unsigned char *vbzdec64(unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start) {
uint64_t x,*op;
for(op = out; op != out+(n&~(4-1)); ) {
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
_vbget64(in, x, ;); *op++ = (start += zigzagdec64(x));
}
while(op != out+n) _vbget64(in, x, *op++ = (start += zigzagdec64(x)));
return in;
}

48
vint.h
View File

@ -1,5 +1,5 @@
/**
Copyright (C) powturbo 2013-2015
Copyright (C) powturbo 2013-2016
GPL v2 License
This program is free software; you can redistribute it and/or modify
@ -31,39 +31,31 @@
extern "C" {
#endif
//--------- 32 bits ------------------
//--------------------------- 32 bits ---------------------------------------------------------------------------------------
extern unsigned char vtab[];
#define vbvlen32(__x) vtab[(__x)&0xf]
#define vbvlen32(__x) vtab[((unsigned char)(__x))>>4]
#define _vbput32(__op, __x, __act) {\
if(likely(__x < (1<< 7))) { *__op++ = __x << 1; __act;}\
else if(likely(__x < (1<<14))) { *(unsigned short *)__op = __x << 2 | 0x01; __op += 2; __act;}\
else if(likely(__x < (1<<21))) { *(unsigned short *)__op = __x << 3 | 0x03; __op += 2; *__op++ = __x >> 13; __act;}\
else if(likely(__x < (1<<28))) { *(unsigned *)__op = __x << 4 | 0x07; __op += 4; __act;}\
else { *(unsigned *)__op = __x << 4 | 0x0f; __op += 4; *__op++ = __x >> 28; __act;}\
if(likely(__x < (1<< 7))) { *__op++ = __x; __act;}\
else if(likely(__x < (1<<14))) { ctou16(__op) = __x << 8 | __x >> 8 | 0x80; __op += 2; __act;}\
else if(likely(__x < (1<<21))) { *__op++ = __x >> 16 | 0xc0; ctou32(__op) = __x; __op += 2; __act;}\
else if(likely(__x < (1<<28))) { ctou32(__op) = rol32(__x,8) | 0xe0; __op += 4; __act;}\
else { *__op++ = (unsigned long long)__x >> 32 | 0xf0; ctou32(__op) = __x; __op += 4; __act;}\
}
//#define __AVX2__VINT
#if defined(__AVX2__) && defined(__AVX2__VINT)
#include <immintrin.h>
extern unsigned long long mtab[];
#define _vbget32(__ip, __x, __act) do { unsigned _vdx=(*__ip)&0xf; __x = _pext_u64(*(unsigned long long *)__ip, mtab[_vdx]); __ip+=vtab[_vdx]; __act; } while(0)
#else
#define _vbget32(__ip, __x, __act) do {\
if(!((__x = *__ip) & (1<<0))) { __ip++; __x >>= 1; __act;}\
else if(!(__x & (1<<1))) { __x = (*(unsigned short *)__ip) >> 2; __ip += 2; __act;}\
else if(!(__x & (1<<2))) { __x = (*(unsigned short *)__ip) >> 3 | (unsigned)(*(__ip+2)) << 13; __ip += 3; __act;}\
else if(!(__x & (1<<3))) { __x = (*(unsigned *)__ip) >> 4; __ip += 4; __act;}\
else { __x = (unsigned long long)(*(unsigned *)__ip) >> 4 | (unsigned long long)(__ip[4]) << 28; __ip += 5; __act;}\
#define _vbget32(__ip, __x, __act) do { __x = *__ip++;\
if(!(__x & 0x80)) { __act;}\
else if(!(__x & 0x40)) { __x = (__x & 0x3f)<< 8 | *__ip++; __act;}\
else if(!(__x & 0x20)) { __x = (__x & 0x1f)<<16 | ctou16(__ip); __ip += 2; __act;}\
else if(!(__x & 0x10)) { __x = ror32(ctou32(__ip-1),8) & 0xfffffff; __ip += 3; __act;}\
else { __x = (unsigned long long)(__x & 0x07)<<32 | ctou32(__ip); __ip += 4; __act;}\
} while(0)
#endif
//----------------- 16 bits --------------------------
//----------------- 16 bits -------------------------------------------------------------------------------------------------------
#define _vbput16(__op, __x) _vbput32(__op, __x)
#define _vbget16(__ip, __x, __act) _vbget32(__ip, __x, __act)
//----------------- 64 bits --------------------------
//----------------- 64 bits -------------------------------------------------------------------------------------------------------
#define _vbput64(__op, __x, __act) {\
if(__x < 1 << 7) { *__op++ = __x << 1; __act;}\
else if(__x < 1 <<14) { *(unsigned short *)__op = __x << 2 | 0x01; __op += 2; __act;}\
@ -96,8 +88,8 @@ extern unsigned long long mtab[];
#define vbput16(__op, __x) vbput32(__op, __x)
#define vbget16(__ip) vbget32(__ip)
#define vbput32(__op, __x) { unsigned _x_ = __x; _vbput32(__op, _x_, ;); }
#define vbget32(__ip) ({ unsigned _x_; _vbget32(__ip, _x_, ;); _x_; })
#define vbput32(__op, __x) { register unsigned _x_ = __x; _vbput32(__op, _x_, ;); }
#define vbget32(__ip) ({ register unsigned _x_; _vbget32(__ip, _x_, ;); _x_; })
#define vbput64(__op, __x) { unsigned long long _x_ = __x; _vbput64(__op, _x_, ;); }
#define vbget64(__ip) ({ unsigned long long _x_; _vbget64(__ip, _x_, ;); _x_; })
@ -122,6 +114,8 @@ unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned
//------ zigzag encoding integer lists -------------------------------------------------------------
unsigned char *vbzenc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start);
unsigned char *vbzdec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start);
unsigned char *vbzenc64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start);
unsigned char *vbzdec64(unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
//--- 15 bits integer lists ------------
#define vbput15(__op, __x) do { unsigned _x = __x; if(likely(_x < 0x80)) *__op++ = _x; else { *__op++ = (_x) >> 8 | 0x80; *__op++ = _x; } } while(0)