From f55022cdfe061dd45d52e31f116354ed924f889d Mon Sep 17 00:00:00 2001 From: x Date: Mon, 13 Mar 2023 10:40:37 +0100 Subject: [PATCH] TurboPFor: Bit Packing --- bic.c | 370 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 185 insertions(+), 185 deletions(-) diff --git a/bic.c b/bic.c index fe90753..ab7e883 100644 --- a/bic.c +++ b/bic.c @@ -1,185 +1,185 @@ -/** - Copyright (C) powturbo 2019-2023 - GPL v2 License - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - - - email : powturbo [AT] gmail.com - - github : https://github.com/powturbo - - homepage : https://sites.google.com/site/powturbo/ - - twitter : https://twitter.com/powturbo -**/ -// Binary Interpolative Coding -// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf -// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598 - -#ifndef USIZE -#include "include_/conf.h" -#include "include_/bic.h" - -#include "include_/bitutil_.h" - -static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); } - -// Simple binary -#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/ -#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) -#define BICENC_ bicbenc_ -#define BICDEC_ bicbdec_ -#define BICENC bicbenc -#define BICDEC bicbdec - -#define USIZE 16 -#define uint_t uint16_t -#include "bic.c" - -#define USIZE 32 -#define uint_t uint32_t -#include "bic.c" -#undef bicput -#undef bicget -#undef BICENC_ -#undef BICDEC_ -#undef BICENC -#undef BICDEC - -// Leftmost minimal -#define bicput(bw,br, _u_, _x_, _usize_) { \ - unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\ - if(_x < hi) bitput(bw,br, _b, _x);\ - else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\ -} - -#define bicget(bw,br, _u_, _x_, _usize_) {\ - unsigned _u = _u_;\ - unsigned _b = T2(__bsr,_usize_)(_u);\ - uint_t _hi = (1ull << (_b + 1)) - _u - 1;\ - if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\ - else { \ - unsigned _y = (bitbw(bw,br)>>_b)&1;\ - bitrmv(bw,br,_b+1);\ - _x_= (_x_<<1) + _y - _hi;\ - }\ -} -#define BICENC_ bicenc_ -#define BICDEC_ bicdec_ -#define BICENC bicenc -#define BICDEC bicdec - -#define USIZE 16 -#define uint_t uint16_t -#include "bic.c" - -#define USIZE 32 -#define uint_t uint32_t -#include "bic.c" -#undef bicput -#undef bicget -#undef BICENC_ -#undef BICDEC_ -#undef BICENC -#undef BICDEC - -// Center Minimal -#define bicput(bw,br, _u_, _x_, _usize_) { \ - unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \ - uint64_t _c = (1ull << (_b + 1)) - _u - 1; \ - unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\ - if(!(_u & 1)) _lo -= 1; \ - _b += (_x <= _lo || _x >= _hi);\ - bitput(bw,br, _b, _x);\ -} - -#define bicget(bw,br, _u_, _x_, _usize_) { \ - unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\ - uint64_t _c = (1ull << (_b + 1)) - _u - 1;\ - unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\ - _lo -= ((_u & 1) == 0);\ - if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\ - else bitget57(bw,br, _b+1, _x_);\ -} -#define BICENC_ bicmenc_ -#define BICDEC_ bicmdec_ -#define BICENC bicmenc -#define BICDEC bicmdec - -#define USIZE 16 -#define uint_t uint16_t -#include "bic.c" - -#define USIZE 32 -#define uint_t uint32_t -#include "bic.c" - -#else //-------------------- Template functions ---------------------------------------------------------------------------------------------------------- -static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) { - while(n) - if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n"); - unsigned x = in[h]; - bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op); - T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br); - in += h+1; n -= h+1; lo = x+1; h = n >> 1; - } else break; -} - -#define RE(a) //a // recursion : RE(a) a -#define RD(a) a // recursion : RD(a) -static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) { - RE(if(!n) return); - RD(do) { - if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal"); - unsigned x; - bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE); - out[h] = (x += lo + h); - if(n != 1) { - T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br); - RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br)); - RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1); - } RD(else break); - } else { - BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; // - RD(break); - } - } RD(while(n)); -} - -unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) { - if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]); - bitdef(bw,br); - unsigned char *op = out; - unsigned x = in[n-1]; - - ctou32(op) = x; op += 4; - T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br); - bitflush(bw,br,op); - return op - out; -} - -unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) { - if(!n) return 0; - bitdef(bw,br); - unsigned char *ip = in; - unsigned x = ctou32(ip); - - ip += 4; - out[n-1] = x; - T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br); - bitalign(bw,br,ip); - return ip - in; -} - -#undef USIZE -#undef uint_t -#endif +/** + Copyright (C) powturbo 2019-2023 + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + - email : powturbo [AT] gmail.com + - github : https://github.com/powturbo + - homepage : https://sites.google.com/site/powturbo/ + - twitter : https://twitter.com/powturbo +**/ +// Binary Interpolative Coding +// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf +// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598 + +#ifndef USIZE +#include "include_/conf.h" +#include "include_/bic.h" + +#include "include_/bitutil_.h" + +static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); } + +// Simple binary +#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/ +#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) +#define BICENC_ bicbenc_ +#define BICDEC_ bicbdec_ +#define BICENC bicbenc +#define BICDEC bicbdec + +#define USIZE 16 +#define uint_t uint16_t +#include "bic.c" + +#define USIZE 32 +#define uint_t uint32_t +#include "bic.c" +#undef bicput +#undef bicget +#undef BICENC_ +#undef BICDEC_ +#undef BICENC +#undef BICDEC + +// Leftmost minimal +#define bicput(bw,br, _u_, _x_, _usize_) { \ + unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\ + if(_x < hi) bitput(bw,br, _b, _x);\ + else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\ +} + +#define bicget(bw,br, _u_, _x_, _usize_) {\ + unsigned _u = _u_;\ + unsigned _b = T2(__bsr,_usize_)(_u);\ + uint_t _hi = (1ull << (_b + 1)) - _u - 1;\ + if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\ + else { \ + unsigned _y = (bitbw(bw,br)>>_b)&1;\ + bitrmv(bw,br,_b+1);\ + _x_= (_x_<<1) + _y - _hi;\ + }\ +} +#define BICENC_ bicenc_ +#define BICDEC_ bicdec_ +#define BICENC bicenc +#define BICDEC bicdec + +#define USIZE 16 +#define uint_t uint16_t +#include "bic.c" + +#define USIZE 32 +#define uint_t uint32_t +#include "bic.c" +#undef bicput +#undef bicget +#undef BICENC_ +#undef BICDEC_ +#undef BICENC +#undef BICDEC + +// Center Minimal +#define bicput(bw,br, _u_, _x_, _usize_) { \ + unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \ + uint64_t _c = (1ull << (_b + 1)) - _u - 1; \ + unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\ + if(!(_u & 1)) _lo -= 1; \ + _b += (_x <= _lo || _x >= _hi);\ + bitput(bw,br, _b, _x);\ +} + +#define bicget(bw,br, _u_, _x_, _usize_) { \ + unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\ + uint64_t _c = (1ull << (_b + 1)) - _u - 1;\ + unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\ + _lo -= ((_u & 1) == 0);\ + if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\ + else bitget57(bw,br, _b+1, _x_);\ +} +#define BICENC_ bicmenc_ +#define BICDEC_ bicmdec_ +#define BICENC bicmenc +#define BICDEC bicmdec + +#define USIZE 16 +#define uint_t uint16_t +#include "bic.c" + +#define USIZE 32 +#define uint_t uint32_t +#include "bic.c" + +#else //-------------------- Template functions ---------------------------------------------------------------------------------------------------------- +static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) { + while(n) + if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n"); + unsigned x = in[h]; + bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op); + T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br); + in += h+1; n -= h+1; lo = x+1; h = n >> 1; + } else break; +} + +#define RE(a) //a // recursion : RE(a) a +#define RD(a) a // recursion : RD(a) +static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) { + RE(if(!n) return); + RD(do) { + if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal"); + unsigned x; + bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE); + out[h] = (x += lo + h); + if(n != 1) { + T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br); + RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br)); + RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1); + } RD(else break); + } else { + BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; // + RD(break); + } + } RD(while(n)); +} + +unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) { + if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]); + bitdef(bw,br); + unsigned char *op = out; + unsigned x = in[n-1]; + + ctou32(op) = x; op += 4; + T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br); + bitflush(bw,br,op); + return op - out; +} + +unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) { + if(!n) return 0; + bitdef(bw,br); + unsigned char *ip = in; + unsigned x = ctou32(ip); + + ip += 4; + out[n-1] = x; + T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br); + bitalign(bw,br,ip); + return ip - in; +} + +#undef USIZE +#undef uint_t +#endif