Files
2023-03-23 11:29:37 +01:00

202 lines
6.8 KiB
C

/**
Copyright (C) powturbo 2019-2023
SPDX-License-Identifier: GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- email : powturbo [AT] gmail.com
- github : https://github.com/powturbo
- homepage : https://sites.google.com/site/powturbo/
- twitter : https://twitter.com/powturbo
**/
// Binary Interpolative Coding
// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf
// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598
#ifndef USIZE //---------- implementation --------------------------------------------------------------------------------------------------------------------------------------
#include "include_/conf.h"
#include "include_/bic.h"
#include "include_/bitutil_.h"
static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); }
size_t bicbound16(size_t n) { return n*2+4; }
size_t bicbound32(size_t n) { return n*4+4; }
//-- Simple binary ----------------------------------------------------------------------
#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/
#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_)
//------------------------------------------
#define BICENC_ bicbenc_
#define BICDEC_ bicbdec_
#define BICENC bicbenc
#define BICDEC bicbdec
//---- 16 bits ----------
#define USIZE 16
#define uint_t uint16_t
#include "bic.c"
//---- 32 bits ----------
#define USIZE 32
#define uint_t uint32_t
#include "bic.c"
#undef bicput
#undef bicget
#undef BICENC_
#undef BICDEC_
#undef BICENC
#undef BICDEC
// -- Leftmost minimal ---------------------------------------------------------------------
#define bicput(bw,br, _u_, _x_, _usize_) { \
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\
if(_x < hi) bitput(bw,br, _b, _x);\
else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\
}
#define bicget(bw,br, _u_, _x_, _usize_) {\
unsigned _u = _u_;\
unsigned _b = T2(__bsr,_usize_)(_u);\
uint_t _hi = (1ull << (_b + 1)) - _u - 1;\
if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\
else { \
unsigned _y = (bitbw(bw,br)>>_b)&1;\
bitrmv(bw,br,_b+1);\
_x_= (_x_<<1) + _y - _hi;\
}\
}
//--------------------------------------------
#define BICENC_ bicenc_
#define BICDEC_ bicdec_
#define BICENC bicenc
#define BICDEC bicdec
//---- 16 bits ----------
#define USIZE 16
#define uint_t uint16_t
#include "bic.c"
//---- 32 bits ----------
#define USIZE 32
#define uint_t uint32_t
#include "bic.c"
#undef bicput
#undef bicget
#undef BICENC_
#undef BICDEC_
#undef BICENC
#undef BICDEC
//-- Center Minimal -----------------------------------------------------
#define bicput(bw,br, _u_, _x_, _usize_) { \
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \
uint64_t _c = (1ull << (_b + 1)) - _u - 1; \
unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\
if(!(_u & 1)) _lo -= 1; \
_b += (_x <= _lo || _x >= _hi);\
bitput(bw,br, _b, _x);\
}
#define bicget(bw,br, _u_, _x_, _usize_) { \
unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\
uint64_t _c = (1ull << (_b + 1)) - _u - 1;\
unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\
_lo -= ((_u & 1) == 0);\
if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\
else bitget57(bw,br, _b+1, _x_);\
}
//--------------------------------------------
#define BICENC_ bicmenc_
#define BICDEC_ bicmdec_
#define BICENC bicmenc
#define BICDEC bicmdec
//---- 16 bits ----------
#define USIZE 16
#define uint_t uint16_t
#include "bic.c"
//---- 32 bits ----------
#define USIZE 32
#define uint_t uint32_t
#include "bic.c"
#else //-------------------- Template functions ----------------------------------------------------------------------------------------------------------
static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
while(n)
if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n");
unsigned x = in[h];
bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op);
T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br);
in += h+1; n -= h+1; lo = x+1; h = n >> 1;
} else break;
}
#define RE(a) //a // recursion : RE(a) a
#define RD(a) a // recursion : RD(a)
static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
RE(if(!n) return);
RD(do) {
if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal");
unsigned x;
bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE);
out[h] = (x += lo + h);
if(n != 1) {
T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br);
RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br));
RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1);
} RD(else break);
} else {
BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; //
RD(break);
}
} RD(while(n));
}
unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) {
if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]);
bitdef(bw,br);
unsigned char *op = out;
unsigned x = in[n-1];
ctou32(op) = x; op += 4;
T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br);
bitflush(bw,br,op);
return op - out;
}
unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) {
if(!n) return 0;
bitdef(bw,br);
unsigned char *ip = in;
unsigned x = ctou32(ip);
ip += 4;
out[n-1] = x;
T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br);
bitalign(bw,br,ip);
return ip - in;
}
#undef USIZE
#undef uint_t
#endif