TurboPFor: Hybrid Turbobyte+TurboPack

This commit is contained in:
x
2023-03-13 10:40:37 +01:00
parent b08263af09
commit d401d90d39

View File

@ -44,53 +44,55 @@
#define PAD8(_x_) ( (((_x_)+8-1)/8) ) #define PAD8(_x_) ( (((_x_)+8-1)/8) )
// 0-0x1f: bitpacking, 0xff = memcpy, 0xfe:varint // 0-0x1f: bitpacking, 0xff = memcpy, 0xfe:varint
#define _V8E(in, n, out, _csize_, _usize_, _bit_, _bitpackv_, _bitpack_) { if(!n) return 0;\ #define _V8E(in, _n_, out, _csize_, _usize_, _bit_, _bitpackv_, _bitpack_) { if(!_n_) return 0;\
unsigned char *op = out, *op_ = out+n*(_usize_/8);\ unsigned char *op = out, *op_ = out+_n_*(_usize_/8);\
for(ip = in; ip < in + n; ) { PREFETCH(ip+512,0);\ for(ip = in; ip < in + _n_; ) { PREFETCH(ip+512,0);\
unsigned _o,_b, iplen = (in+n) - ip; iplen = min(iplen,_csize_);\ unsigned _b, _bs, _o, iplen = (in+_n_) - ip; iplen = min(iplen,_csize_);\
_o = T2(_bit_, _usize_)(ip, iplen, &x); \ _o = T2(_bit_, _usize_)(ip, iplen, &x); _b = T2(bsr,_usize_)(_o); \
_b = T2(bsr,_usize_)(_o);\ _bs = (_b*iplen+7)/8+1;\
if(_b > 8+_usize_/16) {\ if(_b > 8+(_usize_/16)) { /*TurboByte < bitpacking?*/\
unsigned char *_sp = op; \ if(op+1+V8BOUND_(iplen, _usize_) < op_) { /*overflow?*/\
*op++ = 0xfe; op = T2(v8enc, _usize_)(ip, iplen, op);\ unsigned char *_sp = op;\
if(op - _sp < (_b*iplen+7)/8+1) { \ *op++ = 0xfe; \
if(op >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\ op = T2(v8enc, _usize_)(ip, iplen, op); /*AS(op < op_, "#_V8DE overflow %u:%u,%u,%u\n", op - op_, 1+V8BOUND_(iplen, _usize_), op-_sp, v8len16(ip, iplen));*/\
goto a; \ if(op - _sp < _bs) goto a;\
op = _sp; /*restore*/\
}\ }\
op = _sp;\
}\ }\
if(op+(_b*iplen+7)/8+1 >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\ if(op+_bs >= op_) { op = out; *op++ = 0xff; memcpy(op, in, _n_*(_usize_/8)); op += _n_*(_usize_/8); /*AS(op == op_+1, "#_V8DE overflow %u", op - op_);*/ goto e; }\
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, _b):\ if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, _b):\
T2(_bitpack_, _usize_)(ip, iplen, op, _b);\ T2(_bitpack_, _usize_)(ip, iplen, op, _b);\
a:ip += iplen; \ a:ip += iplen; \
}\ } /*AS(op <= op_, "#_V8DE overflow %u", op - op_);*/\
return op - out;\ e: return op - out;\
} }
#define _V8DE(in, n, out, _csize_, _usize_, _v8enc_, _bitd_, _bitpackv_, _bitpack_,_delta_) {\ #define _V8DE(in, _n_, out, _csize_, _usize_, _v8enc_, _bitd_, _bitpackv_, _bitpack_,_delta_) {\
if(!n) return 0;\ if(!_n_) return 0;\
unsigned char *op = out, *op_ = out+n*(_usize_/8);\ unsigned char *op = out, *op_ = out+_n_*(_usize_/8);\
start = *in++;\ start = *in++;\
T2(vbput, _usize_)(op, start); \ T2(vbput, _usize_)(op, start);\
for(n--,ip = in; ip < in + n; ) { /*PREFETCH(ip+512,0);*/\ for(_n_--,ip = in; ip < in + _n_; ) { /*PREFETCH(ip+512,0);*/\
unsigned _b, _o, iplen = (in+n) - ip; iplen = min(iplen,_csize_);\ unsigned _b, _bs, _o, iplen = (in+_n_) - ip; iplen = min(iplen,_csize_);\
_o = T2(_bitd_, _usize_)(ip, iplen, &x, start); _b = T2(bsr,_usize_)(_o);\ _o = T2(_bitd_, _usize_)(ip, iplen, &x, start); _b = T2(bsr,_usize_)(_o); \
if(_b > 8+_usize_/16) { \ _bs = (_b*iplen+7)/8+1;\
unsigned char *_sp = op;\ if(_b > 8+(_usize_/16)) { /*TurboByte < bitpacking?*/\
*op++ = 0xfe; op = T2(_v8enc_, _usize_)(ip, iplen, op, start); \ if(op+1+V8BOUND_(iplen, _usize_) < op_) { /*overflow?*/\
if(op - _sp < (_b*iplen+7)/8+1) { \ unsigned char *_sp = op;\
if(op >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\ *op++ = 0xfe; \
goto a; \ op = T2(_v8enc_, _usize_)(ip, iplen, op, start); /*AS(op < op_, "#_V8DE overflow %u:%u,%u,%u\n", op - op_, 1+V8BOUND_(iplen, _usize_), op-_sp, v8len16(ip, iplen));*/\
if(op - _sp < _bs) goto a;\
op = _sp; /*restore*/\
}\ }\
op = _sp;\
}\ }\
if(op+(_b*iplen+7)/8+1 >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (n+1)*(_usize_/8)); op += (n+1)*(_usize_/8); break; }\ if(op+_bs >= op_) { op = out; *op++ = 0xff; memcpy(op, in-1, (_n_+1)*(_usize_/8)); op += (_n_+1)*(_usize_/8); /*AS(op == op_+1, "#_V8DE overflow %u", op - op_);*/ goto e; }\
if(*op++ = _b) op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b):\ if(*op++ = _b) { op = iplen == _csize_?T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b):\
T2(_bitpack_, _usize_)(ip, iplen, op, start, _b);\ T2(_bitpack_, _usize_)(ip, iplen, op, start, _b);\
a:ip += iplen;\ }\
a: ip += iplen;\
start = ip[-1];\ start = ip[-1];\
}\ } /*AS(op <= op_, "#_V8DE overflow %u", op - op_);*/\
return op - out;\ e:return op - out;\
} }
size_t v8nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start,x; _V8E( in, n, out, 128, 16, bit, bitpack, bitpack); } size_t v8nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start,x; _V8E( in, n, out, 128, 16, bit, bitpack, bitpack); }