TurboPFor: Hybrid Turbobyte+TurboPack

This commit is contained in:
x
2023-03-13 10:40:37 +01:00
parent b08263af09
commit d401d90d39

View File

@ -44,53 +44,55 @@
#define PAD8(_x_) ( (((_x_)+8-1)/8) )
// 0-0x1f: bitpacking, 0xff = memcpy, 0xfe:varint
#define _V8E(in, n, out, _csize_, _usize_, _bit_, _bitpackv_, _bitpack_) { if(!n) return 0;\
unsigned char *op = out, *op_ = out+n*(_usize_/8);\
for(ip = in; ip < in + n; ) { PREFETCH(ip+512,0);\
unsigned _o,_b, iplen = (in+n) - ip; iplen = min(iplen,_csize_);\
_o = T2(_bit_, _usize_)(ip, iplen, &x); \
_b = T2(bsr,_usize_)(_o);\
if(_b > 8+_usize_/16) {\
unsigned char *_sp = op; \
*op++ = 0xfe; op = T2(v8enc, _usize_)(ip, iplen, op);\
if(op - _sp < (_b*iplen+7)/8+1) { \
if(op >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
goto a; \
#define _V8E(in, _n_, out, _csize_, _usize_, _bit_, _bitpackv_, _bitpack_) { if(!_n_) return 0;\
unsigned char *op = out, *op_ = out+_n_*(_usize_/8);\
for(ip = in; ip < in + _n_; ) { PREFETCH(ip+512,0);\
unsigned _b, _bs, _o, iplen = (in+_n_) - ip; iplen = min(iplen,_csize_);\
_o = T2(_bit_, _usize_)(ip, iplen, &x); _b = T2(bsr,_usize_)(_o); \
_bs = (_b*iplen+7)/8+1;\
if(_b > 8+(_usize_/16)) { /*TurboByte < bitpacking?*/\
if(op+1+V8BOUND_(iplen, _usize_) < op_) { /*overflow?*/\
unsigned char *_sp = op;\
*op++ = 0xfe; \
op = T2(v8enc, _usize_)(ip, iplen, op); /*AS(op < op_, "#_V8DE overflow %u:%u,%u,%u\n", op - op_, 1+V8BOUND_(iplen, _usize_), op-_sp, v8len16(ip, iplen));*/\
if(op - _sp < _bs) goto a;\
op = _sp; /*restore*/\
}\
op = _sp;\
}\
if(op+(_b*iplen+7)/8+1 >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
if(op+_bs >= op_) { op = out; *op++ = 0xff; memcpy(op, in, _n_*(_usize_/8)); op += _n_*(_usize_/8); /*AS(op == op_+1, "#_V8DE overflow %u", op - op_);*/ goto e; }\
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, _b):\
T2(_bitpack_, _usize_)(ip, iplen, op, _b);\
a:ip += iplen; \
}\
return op - out;\
} /*AS(op <= op_, "#_V8DE overflow %u", op - op_);*/\
e: return op - out;\
}
#define _V8DE(in, n, out, _csize_, _usize_, _v8enc_, _bitd_, _bitpackv_, _bitpack_,_delta_) {\
if(!n) return 0;\
unsigned char *op = out, *op_ = out+n*(_usize_/8);\
#define _V8DE(in, _n_, out, _csize_, _usize_, _v8enc_, _bitd_, _bitpackv_, _bitpack_,_delta_) {\
if(!_n_) return 0;\
unsigned char *op = out, *op_ = out+_n_*(_usize_/8);\
start = *in++;\
T2(vbput, _usize_)(op, start); \
for(n--,ip = in; ip < in + n; ) { /*PREFETCH(ip+512,0);*/\
unsigned _b, _o, iplen = (in+n) - ip; iplen = min(iplen,_csize_);\
_o = T2(_bitd_, _usize_)(ip, iplen, &x, start); _b = T2(bsr,_usize_)(_o);\
if(_b > 8+_usize_/16) { \
T2(vbput, _usize_)(op, start);\
for(_n_--,ip = in; ip < in + _n_; ) { /*PREFETCH(ip+512,0);*/\
unsigned _b, _bs, _o, iplen = (in+_n_) - ip; iplen = min(iplen,_csize_);\
_o = T2(_bitd_, _usize_)(ip, iplen, &x, start); _b = T2(bsr,_usize_)(_o); \
_bs = (_b*iplen+7)/8+1;\
if(_b > 8+(_usize_/16)) { /*TurboByte < bitpacking?*/\
if(op+1+V8BOUND_(iplen, _usize_) < op_) { /*overflow?*/\
unsigned char *_sp = op;\
*op++ = 0xfe; op = T2(_v8enc_, _usize_)(ip, iplen, op, start); \
if(op - _sp < (_b*iplen+7)/8+1) { \
if(op >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
goto a; \
*op++ = 0xfe; \
op = T2(_v8enc_, _usize_)(ip, iplen, op, start); /*AS(op < op_, "#_V8DE overflow %u:%u,%u,%u\n", op - op_, 1+V8BOUND_(iplen, _usize_), op-_sp, v8len16(ip, iplen));*/\
if(op - _sp < _bs) goto a;\
op = _sp; /*restore*/\
}\
op = _sp;\
}\
if(op+(_b*iplen+7)/8+1 >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b):\
if(op+_bs >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (_n_+1)*(_usize_/8)); op += (_n_+1)*(_usize_/8); /*AS(op == op_+1, "#_V8DE overflow %u", op - op_);*/ goto e; }\
if(*op++ = _b) { op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b):\
T2(_bitpack_, _usize_)(ip, iplen, op, start, _b);\
a:ip += iplen;\
start = ip[-1];\
}\
return op - out;\
a: ip += iplen;\
start = ip[-1];\
} /*AS(op <= op_, "#_V8DE overflow %u", op - op_);*/\
e:return op - out;\
}
size_t v8nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start,x; _V8E( in, n, out, 128, 16, bit, bitpack, bitpack); }