202 lines
6.8 KiB
C
202 lines
6.8 KiB
C
/**
|
|
Copyright (C) powturbo 2019-2023
|
|
SPDX-License-Identifier: GPL v2 License
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
|
|
- email : powturbo [AT] gmail.com
|
|
- github : https://github.com/powturbo
|
|
- homepage : https://sites.google.com/site/powturbo/
|
|
- twitter : https://twitter.com/powturbo
|
|
**/
|
|
// Binary Interpolative Coding
|
|
// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf
|
|
// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598
|
|
|
|
#ifndef USIZE //---------- implementation --------------------------------------------------------------------------------------------------------------------------------------
|
|
#include "include_/conf.h"
|
|
#include "include_/bic.h"
|
|
|
|
#include "include_/bitutil_.h"
|
|
|
|
static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); }
|
|
|
|
size_t bicbound16(size_t n) { return n*2+4; }
|
|
size_t bicbound32(size_t n) { return n*4+4; }
|
|
//-- Simple binary ----------------------------------------------------------------------
|
|
#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/
|
|
#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_)
|
|
|
|
//------------------------------------------
|
|
#define BICENC_ bicbenc_
|
|
#define BICDEC_ bicbdec_
|
|
#define BICENC bicbenc
|
|
#define BICDEC bicbdec
|
|
|
|
//---- 16 bits ----------
|
|
#define USIZE 16
|
|
#define uint_t uint16_t
|
|
#include "bic.c"
|
|
|
|
//---- 32 bits ----------
|
|
#define USIZE 32
|
|
#define uint_t uint32_t
|
|
#include "bic.c"
|
|
#undef bicput
|
|
#undef bicget
|
|
#undef BICENC_
|
|
#undef BICDEC_
|
|
#undef BICENC
|
|
#undef BICDEC
|
|
|
|
// -- Leftmost minimal ---------------------------------------------------------------------
|
|
#define bicput(bw,br, _u_, _x_, _usize_) { \
|
|
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\
|
|
if(_x < hi) bitput(bw,br, _b, _x);\
|
|
else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\
|
|
}
|
|
|
|
#define bicget(bw,br, _u_, _x_, _usize_) {\
|
|
unsigned _u = _u_;\
|
|
unsigned _b = T2(__bsr,_usize_)(_u);\
|
|
uint_t _hi = (1ull << (_b + 1)) - _u - 1;\
|
|
if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\
|
|
else { \
|
|
unsigned _y = (bitbw(bw,br)>>_b)&1;\
|
|
bitrmv(bw,br,_b+1);\
|
|
_x_= (_x_<<1) + _y - _hi;\
|
|
}\
|
|
}
|
|
|
|
//--------------------------------------------
|
|
#define BICENC_ bicenc_
|
|
#define BICDEC_ bicdec_
|
|
#define BICENC bicenc
|
|
#define BICDEC bicdec
|
|
|
|
//---- 16 bits ----------
|
|
#define USIZE 16
|
|
#define uint_t uint16_t
|
|
#include "bic.c"
|
|
|
|
//---- 32 bits ----------
|
|
#define USIZE 32
|
|
#define uint_t uint32_t
|
|
#include "bic.c"
|
|
#undef bicput
|
|
#undef bicget
|
|
#undef BICENC_
|
|
#undef BICDEC_
|
|
#undef BICENC
|
|
#undef BICDEC
|
|
|
|
//-- Center Minimal -----------------------------------------------------
|
|
#define bicput(bw,br, _u_, _x_, _usize_) { \
|
|
unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \
|
|
uint64_t _c = (1ull << (_b + 1)) - _u - 1; \
|
|
unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\
|
|
if(!(_u & 1)) _lo -= 1; \
|
|
_b += (_x <= _lo || _x >= _hi);\
|
|
bitput(bw,br, _b, _x);\
|
|
}
|
|
|
|
#define bicget(bw,br, _u_, _x_, _usize_) { \
|
|
unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\
|
|
uint64_t _c = (1ull << (_b + 1)) - _u - 1;\
|
|
unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\
|
|
_lo -= ((_u & 1) == 0);\
|
|
if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\
|
|
else bitget57(bw,br, _b+1, _x_);\
|
|
}
|
|
|
|
//--------------------------------------------
|
|
#define BICENC_ bicmenc_
|
|
#define BICDEC_ bicmdec_
|
|
#define BICENC bicmenc
|
|
#define BICDEC bicmdec
|
|
|
|
//---- 16 bits ----------
|
|
#define USIZE 16
|
|
#define uint_t uint16_t
|
|
#include "bic.c"
|
|
|
|
//---- 32 bits ----------
|
|
#define USIZE 32
|
|
#define uint_t uint32_t
|
|
#include "bic.c"
|
|
|
|
|
|
#else //-------------------- Template functions ----------------------------------------------------------------------------------------------------------
|
|
|
|
static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
|
|
while(n)
|
|
if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n");
|
|
unsigned x = in[h];
|
|
bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op);
|
|
T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br);
|
|
in += h+1; n -= h+1; lo = x+1; h = n >> 1;
|
|
} else break;
|
|
}
|
|
|
|
#define RE(a) //a // recursion : RE(a) a
|
|
#define RD(a) a // recursion : RD(a)
|
|
static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) {
|
|
RE(if(!n) return);
|
|
RD(do) {
|
|
if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal");
|
|
unsigned x;
|
|
bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE);
|
|
out[h] = (x += lo + h);
|
|
if(n != 1) {
|
|
T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br);
|
|
RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br));
|
|
RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1);
|
|
} RD(else break);
|
|
} else {
|
|
BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; //
|
|
RD(break);
|
|
}
|
|
} RD(while(n));
|
|
}
|
|
|
|
unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) {
|
|
if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]);
|
|
bitdef(bw,br);
|
|
unsigned char *op = out;
|
|
unsigned x = in[n-1];
|
|
|
|
ctou32(op) = x; op += 4;
|
|
T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br);
|
|
bitflush(bw,br,op);
|
|
return op - out;
|
|
}
|
|
|
|
unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) {
|
|
if(!n) return 0;
|
|
bitdef(bw,br);
|
|
unsigned char *ip = in;
|
|
unsigned x = ctou32(ip);
|
|
|
|
ip += 4;
|
|
out[n-1] = x;
|
|
T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br);
|
|
bitalign(bw,br,ip);
|
|
return ip - in;
|
|
}
|
|
|
|
#undef USIZE
|
|
#undef uint_t
|
|
#endif
|