TurboPFor: Hybrid Turbobyte+TurboPack
This commit is contained in:
76
v8pack.c
76
v8pack.c
@ -44,53 +44,55 @@
|
|||||||
#define PAD8(_x_) ( (((_x_)+8-1)/8) )
|
#define PAD8(_x_) ( (((_x_)+8-1)/8) )
|
||||||
// 0-0x1f: bitpacking, 0xff = memcpy, 0xfe:varint
|
// 0-0x1f: bitpacking, 0xff = memcpy, 0xfe:varint
|
||||||
|
|
||||||
#define _V8E(in, n, out, _csize_, _usize_, _bit_, _bitpackv_, _bitpack_) { if(!n) return 0;\
|
#define _V8E(in, _n_, out, _csize_, _usize_, _bit_, _bitpackv_, _bitpack_) { if(!_n_) return 0;\
|
||||||
unsigned char *op = out, *op_ = out+n*(_usize_/8);\
|
unsigned char *op = out, *op_ = out+_n_*(_usize_/8);\
|
||||||
for(ip = in; ip < in + n; ) { PREFETCH(ip+512,0);\
|
for(ip = in; ip < in + _n_; ) { PREFETCH(ip+512,0);\
|
||||||
unsigned _o,_b, iplen = (in+n) - ip; iplen = min(iplen,_csize_);\
|
unsigned _b, _bs, _o, iplen = (in+_n_) - ip; iplen = min(iplen,_csize_);\
|
||||||
_o = T2(_bit_, _usize_)(ip, iplen, &x); \
|
_o = T2(_bit_, _usize_)(ip, iplen, &x); _b = T2(bsr,_usize_)(_o); \
|
||||||
_b = T2(bsr,_usize_)(_o);\
|
_bs = (_b*iplen+7)/8+1;\
|
||||||
if(_b > 8+_usize_/16) {\
|
if(_b > 8+(_usize_/16)) { /*TurboByte < bitpacking?*/\
|
||||||
unsigned char *_sp = op; \
|
if(op+1+V8BOUND_(iplen, _usize_) < op_) { /*overflow?*/\
|
||||||
*op++ = 0xfe; op = T2(v8enc, _usize_)(ip, iplen, op);\
|
unsigned char *_sp = op;\
|
||||||
if(op - _sp < (_b*iplen+7)/8+1) { \
|
*op++ = 0xfe; \
|
||||||
if(op >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
|
op = T2(v8enc, _usize_)(ip, iplen, op); /*AS(op < op_, "#_V8DE overflow %u:%u,%u,%u\n", op - op_, 1+V8BOUND_(iplen, _usize_), op-_sp, v8len16(ip, iplen));*/\
|
||||||
goto a; \
|
if(op - _sp < _bs) goto a;\
|
||||||
|
op = _sp; /*restore*/\
|
||||||
}\
|
}\
|
||||||
op = _sp;\
|
|
||||||
}\
|
}\
|
||||||
if(op+(_b*iplen+7)/8+1 >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
|
if(op+_bs >= op_) { op = out; *op++ = 0xff; memcpy(op, in, _n_*(_usize_/8)); op += _n_*(_usize_/8); /*AS(op == op_+1, "#_V8DE overflow %u", op - op_);*/ goto e; }\
|
||||||
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, _b):\
|
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, _b):\
|
||||||
T2(_bitpack_, _usize_)(ip, iplen, op, _b);\
|
T2(_bitpack_, _usize_)(ip, iplen, op, _b);\
|
||||||
a:ip += iplen; \
|
a:ip += iplen; \
|
||||||
}\
|
} /*AS(op <= op_, "#_V8DE overflow %u", op - op_);*/\
|
||||||
return op - out;\
|
e: return op - out;\
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _V8DE(in, n, out, _csize_, _usize_, _v8enc_, _bitd_, _bitpackv_, _bitpack_,_delta_) {\
|
#define _V8DE(in, _n_, out, _csize_, _usize_, _v8enc_, _bitd_, _bitpackv_, _bitpack_,_delta_) {\
|
||||||
if(!n) return 0;\
|
if(!_n_) return 0;\
|
||||||
unsigned char *op = out, *op_ = out+n*(_usize_/8);\
|
unsigned char *op = out, *op_ = out+_n_*(_usize_/8);\
|
||||||
start = *in++;\
|
start = *in++;\
|
||||||
T2(vbput, _usize_)(op, start); \
|
T2(vbput, _usize_)(op, start);\
|
||||||
for(n--,ip = in; ip < in + n; ) { /*PREFETCH(ip+512,0);*/\
|
for(_n_--,ip = in; ip < in + _n_; ) { /*PREFETCH(ip+512,0);*/\
|
||||||
unsigned _b, _o, iplen = (in+n) - ip; iplen = min(iplen,_csize_);\
|
unsigned _b, _bs, _o, iplen = (in+_n_) - ip; iplen = min(iplen,_csize_);\
|
||||||
_o = T2(_bitd_, _usize_)(ip, iplen, &x, start); _b = T2(bsr,_usize_)(_o);\
|
_o = T2(_bitd_, _usize_)(ip, iplen, &x, start); _b = T2(bsr,_usize_)(_o); \
|
||||||
if(_b > 8+_usize_/16) { \
|
_bs = (_b*iplen+7)/8+1;\
|
||||||
unsigned char *_sp = op;\
|
if(_b > 8+(_usize_/16)) { /*TurboByte < bitpacking?*/\
|
||||||
*op++ = 0xfe; op = T2(_v8enc_, _usize_)(ip, iplen, op, start); \
|
if(op+1+V8BOUND_(iplen, _usize_) < op_) { /*overflow?*/\
|
||||||
if(op - _sp < (_b*iplen+7)/8+1) { \
|
unsigned char *_sp = op;\
|
||||||
if(op >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
|
*op++ = 0xfe; \
|
||||||
goto a; \
|
op = T2(_v8enc_, _usize_)(ip, iplen, op, start); /*AS(op < op_, "#_V8DE overflow %u:%u,%u,%u\n", op - op_, 1+V8BOUND_(iplen, _usize_), op-_sp, v8len16(ip, iplen));*/\
|
||||||
|
if(op - _sp < _bs) goto a;\
|
||||||
|
op = _sp; /*restore*/\
|
||||||
}\
|
}\
|
||||||
op = _sp;\
|
|
||||||
}\
|
}\
|
||||||
if(op+(_b*iplen+7)/8+1 >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
|
if(op+_bs >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (_n_+1)*(_usize_/8)); op += (_n_+1)*(_usize_/8); /*AS(op == op_+1, "#_V8DE overflow %u", op - op_);*/ goto e; }\
|
||||||
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b):\
|
if(*op++ = _b) { op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b):\
|
||||||
T2(_bitpack_, _usize_)(ip, iplen, op, start, _b);\
|
T2(_bitpack_, _usize_)(ip, iplen, op, start, _b);\
|
||||||
a:ip += iplen;\
|
}\
|
||||||
|
a: ip += iplen;\
|
||||||
start = ip[-1];\
|
start = ip[-1];\
|
||||||
}\
|
} /*AS(op <= op_, "#_V8DE overflow %u", op - op_);*/\
|
||||||
return op - out;\
|
e:return op - out;\
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t v8nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start,x; _V8E( in, n, out, 128, 16, bit, bitpack, bitpack); }
|
size_t v8nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start,x; _V8E( in, n, out, 128, 16, bit, bitpack, bitpack); }
|
||||||
|
Reference in New Issue
Block a user