Files
TurboPFor-Integer-Compression/vint.h
2017-01-02 23:30:16 +01:00

257 lines
17 KiB
C

/**
Copyright (C) powturbo 2013-2017
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- homepage : https://sites.google.com/site/powturbo/
- github : https://github.com/powturbo
- twitter : https://twitter.com/powturbo
- email : powturbo [_AT_] gmail [_DOT_] com
**/
// "Integer Compression" variable byte include header
#ifndef VINT_H
#define VINT_H
#include <stdint.h>
#include "conf.h"
#ifdef __cplusplus
extern "C" {
#endif
//----------------------------------- Variable byte: single value macros (low level) -----------------------------------------------
//------------- 32 bits -------------
extern unsigned char _vtab32_[];
#define _vbxvlen32(_x_) _vtab32_[(unsigned char)(_x_)>>4] // (clz32((_x_) ^ 0xff) - 23) //
#define _vbxlen32(_x_) ((bsr32(_x_|1)+6)/7)
#define _vbxput32(_op_, _x_, _act_) {\
if(likely((_x_) < (1<< 7))) { *_op_++ = _x_; _act_;}\
else if(likely((_x_) < (1<<14))) { ctou16(_op_) = bswap16((_x_) | 0x8000u); _op_ += 2; _act_;}\
else if(likely((_x_) < (1<<21))) { *_op_++ = _x_ >> 16 | 0xc0u; ctou16(_op_) = _x_; _op_ += 2; _act_;}\
else if(likely((_x_) < (1<<28))) { ctou32(_op_) = bswap32((_x_) | 0xe0000000u); _op_ += 4; _act_;}\
else { *_op_++ = (unsigned long long)(_x_) >> 32 | 0xf0u; ctou32(_op_) = _x_; _op_ += 4; _act_;}\
}
#define _vbxget32(_ip_, _x_, _act_) do { _x_ = (unsigned)(*_ip_++);\
if(!(_x_ & 0x80u)) { _act_;}\
else if(!(_x_ & 0x40u)) { _x_ = bswap16(ctou16(_ip_ - 1) & 0xff3fu); _ip_++; _act_;}\
else if(!(_x_ & 0x20u)) { _x_ = (_x_ & 0x1f)<<16 | ctou16(_ip_); _ip_ += 2; _act_;}\
else if(!(_x_ & 0x10u)) { _x_ = bswap32(ctou32(_ip_-1) & 0xffffff0fu); _ip_ += 3; _act_;}\
else { _x_ = (unsigned long long)((_x_) & 0x07)<<32 | ctou32(_ip_); _ip_ += 4; _act_;}\
} while(0)
//------------- 64 bits -----------
#define _vbxlen64(_x_) ((bsr64(_x_)+6)/7)
#define _vbxvlen64(_x_) ((_x_)==0xff?9:clz32((_x_) ^ 0xff) - 23)
#define _vbxput64(_op_, _x_, _act_) {\
if(likely(_x_ < (1<< 7))) { *_op_++ = _x_; _act_;}\
else if(likely(_x_ < (1<<14))) { ctou16(_op_) = bswap16(_x_| 0x8000); _op_ += 2; _act_;}\
else if(likely(_x_ < (1<<21))) { *_op_++ = _x_ >> 16 | 0xc0; ctou16(_op_) = _x_; _op_ += 2; _act_;}\
else if(likely(_x_ < (1<<28))) { ctou32(_op_) = bswap32(_x_| 0xe0000000); _op_ += 4; _act_;}\
else if( _x_ < 1ull<<35) { *_op_++ = _x_ >> 32 | 0xf0; ctou32(_op_) = _x_; _op_ += 4; _act_;}\
else if( _x_ < 1ull<<42) { ctou16(_op_) = bswap16(_x_ >> 32 | 0xf800); _op_ += 2; ctou32(_op_) = _x_; _op_ += 4; _act_;}\
else if( _x_ < 1ull<<49) { *_op_++ = _x_ >> 48 | 0xfc; ctou16(_op_) = _x_ >> 32; _op_ += 2; ctou32(_op_) = _x_; _op_ += 4; _act_;}\
else if( _x_ < 1ull<<56) { ctou64(_op_) = bswap64(_x_ | 0xfe00000000000000ull); _op_ += 8; _act_;}\
else { *_op_++ = 0xff; ctou64(_op_) = _x_; _op_ += 8; _act_;}\
}
#define _vbxget64(_ip_, _x_, _act_) do { _x_ = *_ip_++;\
if(!(_x_ & 0x80)) { _act_;}\
else if(!(_x_ & 0x40)) { _x_ = bswap16(ctou16(_ip_++-1) & 0xff3f); _act_;}\
else if(!(_x_ & 0x20)) { _x_ = (_x_ & 0x1f)<<16 | ctou16(_ip_); _ip_ += 2; _act_;}\
else if(!(_x_ & 0x10)) { _x_ = bswap32(ctou32(_ip_-1) & 0xffffff0f); _ip_ += 3; _act_;}\
else if(!(_x_ & 0x08)) { _x_ = (_x_ & 0x07)<<32 | ctou32(_ip_); _ip_ += 4; _act_;}\
else if(!(_x_ & 0x04)) { _x_ = (unsigned long long)(bswap16(ctou16(_ip_-1)) & 0x7ff) << 32 | ctou32(_ip_+1); _ip_ += 5; _act_;}\
else if(!(_x_ & 0x02)) { _x_ = (_x_ & 0x03)<<48 | (unsigned long long)ctou16(_ip_) << 32 | ctou32(_ip_+2); _ip_ += 6; _act_;}\
else if(!(_x_ & 0x01)) { _x_ = bswap64(ctou64(_ip_-1)) & 0x01ffffffffffffffull; _ip_ += 7; _act_;}\
else { _x_ = ctou64(_ip_); _ip_ += 8; _act_;}\
} while(0)
#define vbxput64(_op_, _x_) { unsigned long long _x = _x_; _vbxput64(_op_, _x, ;); }
#define vbxput32(_op_, _x_) { register unsigned _x = _x_; _vbxput32(_op_, _x, ;); }
#define vbxput16(_op_, _x_) vbxput32(_op_, _x_)
#define vbxget64(_ip_, _x_) _vbxget64(_ip_, _x_, ;)
#define vbxget32(_ip_, _x_) _vbxget32(_ip_, _x_, ;)
#define vbxget16(_ip_, _x_) vbxget32(_ip_,_x_)
//---------------------------------------------------------------------------
#define VB_SIZE 64
#define VB_MAX 254
#define VB_B2 6
#define VB_B3 3
#define VB_BA3 (VB_MAX - (VB_SIZE/8 - 3))
#define VB_BA2 (VB_BA3 - (1<<VB_B3))
#define VB_OFS1 (VB_BA2 - (1<<VB_B2))
#define VB_OFS2 (VB_OFS1 + (1 << (8+VB_B2)))
#define VB_OFS3 (VB_OFS2 + (1 << (16+VB_B3)))
#define _vblen32(_x_) ((_x_) < VB_OFS1?1:((_x_) < VB_OFS2?2:((_x_) < VB_OFS3)?3:(bsr32(_x_)+7)/8+1))
#define _vbvlen32(_x_) ((_x_) < VB_OFS1?1:((_x_) < VB_BA2?2:((_x_) < VB_BA3)?3:(_x_-VB_BA3)))
#define _vbput32(_op_, _x_, _act_) {\
if(likely((_x_) < VB_OFS1)){ *_op_++ = (_x_); _act_;}\
else if ((_x_) < VB_OFS2) { ctou16(_op_) = bswap16((VB_OFS1<<8)+((_x_)-VB_OFS1)); _op_ += 2; /*(_x_) -= VB_OFS1; *_op_++ = VB_OFS1 + ((_x_) >> 8); *_op_++ = (_x_);*/ _act_; }\
else if ((_x_) < VB_OFS3) { *_op_++ = VB_BA2 + (((_x_) -= VB_OFS2) >> 16); ctou16(_op_) = (_x_); _op_ += 2; _act_;}\
else { unsigned _b = (bsr32((_x_))+7)/8; *_op_++ = VB_BA3 + (_b - 3); ctou32(_op_) = (_x_); _op_ += _b; _act_;}\
}
#define _vbget32(_ip_, _x_, _act_) do { _x_ = *_ip_++;\
if(likely(_x_ < VB_OFS1)) { _act_ ;}\
else if(likely(_x_ < VB_BA2)) { _x_ = /*bswap16(ctou16(_ip_-1))*/ ((_x_<<8) + (*_ip_)) + (VB_OFS1 - (VB_OFS1 << 8)); _ip_++; _act_;} \
else if(likely(_x_ < VB_BA3)) { _x_ = ctou16(_ip_) + ((_x_ - VB_BA2 ) << 16) + VB_OFS2; _ip_ += 2; _act_;}\
else { unsigned _b = _x_-VB_BA3; _x_ = ctou32(_ip_) & ((1u << 8 * _b << 24) - 1); _ip_ += 3 + _b; _act_;}\
} while(0)
#define _vblen64(_x_) _vblen32(_x_)
#define _vbvlen64(_x_) _vbvlen32(_x_)
#define _vbput64(_op_, _x_, _act_) _vbput32(_op_, _x_, _act_)
#define _vbget64(_ip_, _x_, _act_) _vbget32(_ip_, _x_, _act_)
#ifdef _WIN32
//#define fgetc_unlocked(_f_) _fgetc_nolock(_f_)
#define fputc_unlocked(_c_, _f_) fputc(_c_,_f_)
#define fgetc_unlocked(_f_) fgetc(_f_)
#else
#define fputc_unlocked(_c_, _f_) _IO_putc_unlocked(_c_,_f_)
#define fgetc_unlocked(_f_) _IO_getc_unlocked(_f_)
#endif
#define leb128put(_op_, _x_) { typeof(_x_) _x = _x_; while(_x > 0x7f) { *_op_++ = _x & 0x7f; _x >>= 7; } *_op_++ = _x | 0x80; }
#define vbfput32(_f_, _x_) ({ typeof(_x_) _x = _x_; while(_x > 0x7f) { fputc_unlocked(_x & 0x7f, _f_); _x >>= 7; } fputc_unlocked(_x | 0x80, _f_); })
#define _leb128get(_ip_, _x_, _act_) { unsigned _sft=0; for(_x_=0;;_sft += 7) { unsigned _c = *_ip_++; _x_ += (_c & 0x7f) << _sft; if(_c >= 0x80) { _act_; break; } } }
#define leb128get(_ip_, _x_) vbgetax(_ip_, _x_, ;)
#define vbfget32(_f_ ) ({ unsigned _sft=0,_x=0; for(;;_sft += 7) { unsigned _c = fgetc_unlocked(_f_); if(_c != EOF) { _x += (_c & 0x7f) << _sft; if(_c & 0x80) break; } else { _x = EOF; break; } } _x; })
//------------- 16 bits -----------
#define _vblen16(_x_) _vblen32(_x_)
#define _vbvlen16(_x_) _vbvlen32(_x_)
#define _vbput16(_op_, _x_, _act_) _vbput32(_op_, _x_, _act_)
#define _vbget16(_ip_, _x_, _act_) _vbget32(_ip_, _x_, _act_)
//----------------------------------- Variable byte: single value functions -----------------------------------------------
// ---- Variable byte length after compression
static inline unsigned vblen16(unsigned short x) { return _vblen16(x); }
static inline unsigned vblen32(unsigned x) { return _vblen32(x); }
static inline unsigned vblen64(uint64_t x) { return _vblen64(x); }
// ---- Length of compressed value. Input in is the first char of the compressed buffer start (Ex. vbvlen32(in[0]) )
static inline unsigned vbvlen16(unsigned x) { return _vbvlen32(x); }
static inline unsigned vbvlen32(unsigned x) { return _vbvlen32(x); }
static inline unsigned vbvlen64(unsigned x) { return _vbvlen64(x); }
//----- encode/decode 16/32/64 single value and advance output/input pointer
#define vbput64(_op_, _x_) { unsigned long long _x = _x_; _vbput64(_op_, _x, ;); }
#define vbput32(_op_, _x_) { register unsigned _x = _x_; _vbput32(_op_, _x, ;); }
#define vbput16(_op_, _x_) vbput32(_op_, _x_)
#define vbget64(_ip_, _x_) _vbget64(_ip_, _x_, ;)
#define vbget32(_ip_, _x_) _vbget32(_ip_, _x_, ;)
#define vbget16(_ip_, _x_) vbget32(_ip_,_x_)
//----------------------------- Variable byte: array functions -----------------------------------------------------------------------
// Encoding/DEcoding: Return value = end of compressed output/input buffer out/in
//----------------------- Encoding/Decoding unsorted array with n integer values --------------------------
unsigned char *vbenc16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out);
unsigned char *vbenc32( unsigned *__restrict in, unsigned n, unsigned char *__restrict out);
unsigned char *vbenc64( uint64_t *__restrict in, unsigned n, unsigned char *__restrict out);
//-- Decode
unsigned char *vbdec16( unsigned char *__restrict in, unsigned n, unsigned short *__restrict out);
unsigned char *vbdec32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out);
unsigned char *vbdec64( unsigned char *__restrict in, unsigned n, uint64_t *__restrict out);
//-- Get value stored at index idx (idx:0...n-1)
unsigned short vbgetx16( unsigned char *__restrict in, unsigned idx);
unsigned vbgetx32( unsigned char *__restrict in, unsigned idx);
uint64_t vbgetx64( unsigned char *__restrict in, unsigned idx);
//-- Search and return index of next value equal to key or n when no key value found
// ex. unsigned idx;unsigned char *ip; for(idx=0,ip=in;;) { if((idx = vgeteq32(&ip, idx, 4321))>=n) break; printf("found at %u ", idx); }
unsigned vbgeteq16( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned short key);
unsigned vbgeteq32( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned key);
unsigned vbgeteq64( unsigned char **__restrict in, unsigned n, unsigned idx, uint64_t key);
//---------------------- Delta encoding/decoding sorted array ---------------------------------------------
//-- Increasing integer array. out[i] = out[i-1] + in[i]
unsigned char *vbdenc16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start);
unsigned char *vbdenc32( unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start);
unsigned char *vbdenc64( uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start);
unsigned char *vbddec16( unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start);
unsigned char *vbddec32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start);
unsigned char *vbddec64( unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
//-- Get value stored at index idx (idx:0...n-1)
unsigned short vbdgetx16( unsigned char *__restrict in, unsigned idx, unsigned short start);
unsigned vbdgetx32( unsigned char *__restrict in, unsigned idx, unsigned start);
uint64_t vbdgetx64( unsigned char *__restrict in, unsigned idx, uint64_t start);
//-- Search and return index of next value equal to key or n when no key value found
// ex. unsigned idx;unsigned char *ip; for(idx=0,ip=in;;) { if((idx = vgeteq32(&ip, idx, 4321))>=n) break; printf("found at %u ", idx); }
unsigned vbdgetgeq16( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned short *key, unsigned short start);
unsigned vbdgetgeq32( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned *key, unsigned start);
unsigned vbdgetgeq64( unsigned char **__restrict in, unsigned n, unsigned idx, uint64_t *key, uint64_t start);
//-- Strictly increasing (never remaining constant or decreasing) integer array. out[i] = out[i-1] + in[i] + 1
unsigned char *vbd1enc16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start);
unsigned char *vbd1enc32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start);
unsigned char *vbd1enc64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start);
unsigned char *vbd1dec16(unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start);
unsigned char *vbd1dec32(unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start);
unsigned char *vbd1dec64(unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
//-- Get value stored at index idx (idx:0...n-1)
unsigned short vbd1getx16( unsigned char *__restrict in, unsigned idx, unsigned short start);
unsigned vbd1getx32( unsigned char *__restrict in, unsigned idx, unsigned start);
uint64_t vbd1getx64( unsigned char *__restrict in, unsigned idx, uint64_t start);
//-- Search and return index of next value equal to key or n when no key value found
// ex. unsigned idx;unsigned char *ip; for(idx=0,ip=in;;) { if((idx = vgeteq32(&ip, idx, 4321))>=n) break; printf("found at %u ", idx); }
unsigned vbd1getgeq16( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned short *key, unsigned short start);
unsigned vbd1getgeq32( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned *key, unsigned start);
unsigned vbd1getgeq64( unsigned char **__restrict in, unsigned n, unsigned idx, uint64_t *key, uint64_t start);
//---------------------- Zigzag encoding/decoding for unsorted integer lists.
unsigned char *vbzenc16( unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start);
unsigned char *vbzenc32( unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start);
unsigned char *vbzenc64( uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, uint64_t start);
unsigned char *vbzdec16( unsigned char *__restrict in, unsigned n, unsigned short *__restrict out, unsigned short start);
unsigned char *vbzdec32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start);
unsigned char *vbzdec64( unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
//-- Get value stored at index idx (idx:0...n-1)
unsigned short vbzgetx16( unsigned char *__restrict in, unsigned idx, unsigned short start);
unsigned vbzgetx32( unsigned char *__restrict in, unsigned idx, unsigned start);
uint64_t vbzgetx64( unsigned char *__restrict in, unsigned idx, uint64_t start);
//-- Search and return index of next value equal to key or n when no key value found
// ex. unsigned idx;unsigned char *ip; for(idx=0,ip=in;;) { if((idx = vgeteq32(&ip, idx, 4321))>=n) break; printf("found at %u ", idx); }
/*unsigned vbzgeteq15( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned short key, unsigned start);
unsigned vbzgeteq16( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned short key, unsigned start);
unsigned vbzgeteq32( unsigned char **__restrict in, unsigned n, unsigned idx, unsigned key, unsigned start);
unsigned vbzgeteq64( unsigned char **__restrict in, unsigned n, unsigned idx, uint64_t key, unsigned start);*/
#ifdef __cplusplus
}
#endif
#endif