TurboPFor: Bit Packing
This commit is contained in:
370
bic.c
370
bic.c
@ -1,185 +1,185 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2019-2023
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
- email : powturbo [AT] gmail.com
|
||||
- github : https://github.com/powturbo
|
||||
- homepage : https://sites.google.com/site/powturbo/
|
||||
- twitter : https://twitter.com/powturbo
|
||||
**/
|
||||
// Binary Interpolative Coding
|
||||
// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf
|
||||
// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598
|
||||
|
||||
#ifndef USIZE
|
||||
#include "include_/conf.h"
|
||||
#include "include_/bic.h"
|
||||
|
||||
#include "include_/bitutil_.h"
|
||||
|
||||
static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); }
|
||||
|
||||
// Simple binary
|
||||
#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/
|
||||
#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_)
|
||||
#define BICENC_ bicbenc_
|
||||
#define BICDEC_ bicbdec_
|
||||
#define BICENC bicbenc
|
||||
#define BICDEC bicbdec
|
||||
|
||||
#define USIZE 16
|
||||
#define uint_t uint16_t
|
||||
#include "bic.c"
|
||||
|
||||
#define USIZE 32
|
||||
#define uint_t uint32_t
|
||||
#include "bic.c"
|
||||
#undef bicput
|
||||
#undef bicget
|
||||
#undef BICENC_
|
||||
#undef BICDEC_
|
||||
#undef BICENC
|
||||
#undef BICDEC
|
||||
|
||||
// Leftmost minimal
|
||||
#define bicput(bw,br, _u_, _x_, _usize_) { \
|
||||
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\
|
||||
if(_x < hi) bitput(bw,br, _b, _x);\
|
||||
else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\
|
||||
}
|
||||
|
||||
#define bicget(bw,br, _u_, _x_, _usize_) {\
|
||||
unsigned _u = _u_;\
|
||||
unsigned _b = T2(__bsr,_usize_)(_u);\
|
||||
uint_t _hi = (1ull << (_b + 1)) - _u - 1;\
|
||||
if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\
|
||||
else { \
|
||||
unsigned _y = (bitbw(bw,br)>>_b)&1;\
|
||||
bitrmv(bw,br,_b+1);\
|
||||
_x_= (_x_<<1) + _y - _hi;\
|
||||
}\
|
||||
}
|
||||
#define BICENC_ bicenc_
|
||||
#define BICDEC_ bicdec_
|
||||
#define BICENC bicenc
|
||||
#define BICDEC bicdec
|
||||
|
||||
#define USIZE 16
|
||||
#define uint_t uint16_t
|
||||
#include "bic.c"
|
||||
|
||||
#define USIZE 32
|
||||
#define uint_t uint32_t
|
||||
#include "bic.c"
|
||||
#undef bicput
|
||||
#undef bicget
|
||||
#undef BICENC_
|
||||
#undef BICDEC_
|
||||
#undef BICENC
|
||||
#undef BICDEC
|
||||
|
||||
// Center Minimal
|
||||
#define bicput(bw,br, _u_, _x_, _usize_) { \
|
||||
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \
|
||||
uint64_t _c = (1ull << (_b + 1)) - _u - 1; \
|
||||
unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\
|
||||
if(!(_u & 1)) _lo -= 1; \
|
||||
_b += (_x <= _lo || _x >= _hi);\
|
||||
bitput(bw,br, _b, _x);\
|
||||
}
|
||||
|
||||
#define bicget(bw,br, _u_, _x_, _usize_) { \
|
||||
unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\
|
||||
uint64_t _c = (1ull << (_b + 1)) - _u - 1;\
|
||||
unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\
|
||||
_lo -= ((_u & 1) == 0);\
|
||||
if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\
|
||||
else bitget57(bw,br, _b+1, _x_);\
|
||||
}
|
||||
#define BICENC_ bicmenc_
|
||||
#define BICDEC_ bicmdec_
|
||||
#define BICENC bicmenc
|
||||
#define BICDEC bicmdec
|
||||
|
||||
#define USIZE 16
|
||||
#define uint_t uint16_t
|
||||
#include "bic.c"
|
||||
|
||||
#define USIZE 32
|
||||
#define uint_t uint32_t
|
||||
#include "bic.c"
|
||||
|
||||
#else //-------------------- Template functions ----------------------------------------------------------------------------------------------------------
|
||||
static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
|
||||
while(n)
|
||||
if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n");
|
||||
unsigned x = in[h];
|
||||
bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op);
|
||||
T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br);
|
||||
in += h+1; n -= h+1; lo = x+1; h = n >> 1;
|
||||
} else break;
|
||||
}
|
||||
|
||||
#define RE(a) //a // recursion : RE(a) a
|
||||
#define RD(a) a // recursion : RD(a)
|
||||
static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
|
||||
RE(if(!n) return);
|
||||
RD(do) {
|
||||
if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal");
|
||||
unsigned x;
|
||||
bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE);
|
||||
out[h] = (x += lo + h);
|
||||
if(n != 1) {
|
||||
T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br);
|
||||
RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br));
|
||||
RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1);
|
||||
} RD(else break);
|
||||
} else {
|
||||
BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; //
|
||||
RD(break);
|
||||
}
|
||||
} RD(while(n));
|
||||
}
|
||||
|
||||
unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) {
|
||||
if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]);
|
||||
bitdef(bw,br);
|
||||
unsigned char *op = out;
|
||||
unsigned x = in[n-1];
|
||||
|
||||
ctou32(op) = x; op += 4;
|
||||
T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br);
|
||||
bitflush(bw,br,op);
|
||||
return op - out;
|
||||
}
|
||||
|
||||
unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) {
|
||||
if(!n) return 0;
|
||||
bitdef(bw,br);
|
||||
unsigned char *ip = in;
|
||||
unsigned x = ctou32(ip);
|
||||
|
||||
ip += 4;
|
||||
out[n-1] = x;
|
||||
T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br);
|
||||
bitalign(bw,br,ip);
|
||||
return ip - in;
|
||||
}
|
||||
|
||||
#undef USIZE
|
||||
#undef uint_t
|
||||
#endif
|
||||
/**
|
||||
Copyright (C) powturbo 2019-2023
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
- email : powturbo [AT] gmail.com
|
||||
- github : https://github.com/powturbo
|
||||
- homepage : https://sites.google.com/site/powturbo/
|
||||
- twitter : https://twitter.com/powturbo
|
||||
**/
|
||||
// Binary Interpolative Coding
|
||||
// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf
|
||||
// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598
|
||||
|
||||
#ifndef USIZE
|
||||
#include "include_/conf.h"
|
||||
#include "include_/bic.h"
|
||||
|
||||
#include "include_/bitutil_.h"
|
||||
|
||||
static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); }
|
||||
|
||||
// Simple binary
|
||||
#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/
|
||||
#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_)
|
||||
#define BICENC_ bicbenc_
|
||||
#define BICDEC_ bicbdec_
|
||||
#define BICENC bicbenc
|
||||
#define BICDEC bicbdec
|
||||
|
||||
#define USIZE 16
|
||||
#define uint_t uint16_t
|
||||
#include "bic.c"
|
||||
|
||||
#define USIZE 32
|
||||
#define uint_t uint32_t
|
||||
#include "bic.c"
|
||||
#undef bicput
|
||||
#undef bicget
|
||||
#undef BICENC_
|
||||
#undef BICDEC_
|
||||
#undef BICENC
|
||||
#undef BICDEC
|
||||
|
||||
// Leftmost minimal
|
||||
#define bicput(bw,br, _u_, _x_, _usize_) { \
|
||||
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\
|
||||
if(_x < hi) bitput(bw,br, _b, _x);\
|
||||
else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\
|
||||
}
|
||||
|
||||
#define bicget(bw,br, _u_, _x_, _usize_) {\
|
||||
unsigned _u = _u_;\
|
||||
unsigned _b = T2(__bsr,_usize_)(_u);\
|
||||
uint_t _hi = (1ull << (_b + 1)) - _u - 1;\
|
||||
if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\
|
||||
else { \
|
||||
unsigned _y = (bitbw(bw,br)>>_b)&1;\
|
||||
bitrmv(bw,br,_b+1);\
|
||||
_x_= (_x_<<1) + _y - _hi;\
|
||||
}\
|
||||
}
|
||||
#define BICENC_ bicenc_
|
||||
#define BICDEC_ bicdec_
|
||||
#define BICENC bicenc
|
||||
#define BICDEC bicdec
|
||||
|
||||
#define USIZE 16
|
||||
#define uint_t uint16_t
|
||||
#include "bic.c"
|
||||
|
||||
#define USIZE 32
|
||||
#define uint_t uint32_t
|
||||
#include "bic.c"
|
||||
#undef bicput
|
||||
#undef bicget
|
||||
#undef BICENC_
|
||||
#undef BICDEC_
|
||||
#undef BICENC
|
||||
#undef BICDEC
|
||||
|
||||
// Center Minimal
|
||||
#define bicput(bw,br, _u_, _x_, _usize_) { \
|
||||
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \
|
||||
uint64_t _c = (1ull << (_b + 1)) - _u - 1; \
|
||||
unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\
|
||||
if(!(_u & 1)) _lo -= 1; \
|
||||
_b += (_x <= _lo || _x >= _hi);\
|
||||
bitput(bw,br, _b, _x);\
|
||||
}
|
||||
|
||||
#define bicget(bw,br, _u_, _x_, _usize_) { \
|
||||
unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\
|
||||
uint64_t _c = (1ull << (_b + 1)) - _u - 1;\
|
||||
unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\
|
||||
_lo -= ((_u & 1) == 0);\
|
||||
if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\
|
||||
else bitget57(bw,br, _b+1, _x_);\
|
||||
}
|
||||
#define BICENC_ bicmenc_
|
||||
#define BICDEC_ bicmdec_
|
||||
#define BICENC bicmenc
|
||||
#define BICDEC bicmdec
|
||||
|
||||
#define USIZE 16
|
||||
#define uint_t uint16_t
|
||||
#include "bic.c"
|
||||
|
||||
#define USIZE 32
|
||||
#define uint_t uint32_t
|
||||
#include "bic.c"
|
||||
|
||||
#else //-------------------- Template functions ----------------------------------------------------------------------------------------------------------
|
||||
static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
|
||||
while(n)
|
||||
if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n");
|
||||
unsigned x = in[h];
|
||||
bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op);
|
||||
T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br);
|
||||
in += h+1; n -= h+1; lo = x+1; h = n >> 1;
|
||||
} else break;
|
||||
}
|
||||
|
||||
#define RE(a) //a // recursion : RE(a) a
|
||||
#define RD(a) a // recursion : RD(a)
|
||||
static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
|
||||
RE(if(!n) return);
|
||||
RD(do) {
|
||||
if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal");
|
||||
unsigned x;
|
||||
bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE);
|
||||
out[h] = (x += lo + h);
|
||||
if(n != 1) {
|
||||
T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br);
|
||||
RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br));
|
||||
RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1);
|
||||
} RD(else break);
|
||||
} else {
|
||||
BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; //
|
||||
RD(break);
|
||||
}
|
||||
} RD(while(n));
|
||||
}
|
||||
|
||||
unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) {
|
||||
if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]);
|
||||
bitdef(bw,br);
|
||||
unsigned char *op = out;
|
||||
unsigned x = in[n-1];
|
||||
|
||||
ctou32(op) = x; op += 4;
|
||||
T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br);
|
||||
bitflush(bw,br,op);
|
||||
return op - out;
|
||||
}
|
||||
|
||||
unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) {
|
||||
if(!n) return 0;
|
||||
bitdef(bw,br);
|
||||
unsigned char *ip = in;
|
||||
unsigned x = ctou32(ip);
|
||||
|
||||
ip += 4;
|
||||
out[n-1] = x;
|
||||
T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br);
|
||||
bitalign(bw,br,ip);
|
||||
return ip - in;
|
||||
}
|
||||
|
||||
#undef USIZE
|
||||
#undef uint_t
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user