From 4f51ccdf2af2b75672a53dfd4a7b50280f0410dd Mon Sep 17 00:00:00 2001 From: x Date: Fri, 10 Mar 2023 20:28:32 +0100 Subject: [PATCH] TurboPFor: Bit Packing --- bic.c | 185 +++++++++++++++++++++++++++++++++ bitpack.c | 306 +++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 406 insertions(+), 85 deletions(-) create mode 100644 bic.c diff --git a/bic.c b/bic.c new file mode 100644 index 0000000..7ccec66 --- /dev/null +++ b/bic.c @@ -0,0 +1,185 @@ +/** + Copyright (C) powturbo 2019-2023 + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + - email : powturbo [AT] gmail.com + - github : https://github.com/powturbo + - homepage : https://sites.google.com/site/powturbo/ + - twitter : https://twitter.com/powturbo +**/ +// Binary Interpolative Coding +// Reference: "On Implementing the Binary Interpolative Coding Algorithm" GIULIO ERMANNO PIBIRI, ISTI-CNS http://pages.di.unipi.it/pibiri/papers/BIC.pdf +// "Techniques for Inverted Index Compression" GIULIO ERMANNO PIBIRI, ROSSANO VENTURINI, University of Pisa https://arxiv.org/abs/1908.10598 + +#ifndef USIZE +#include "include_/conf.h" +#include "include_/bic.h" + +#include "include_/bitutil_.h" + +static ALWAYS_INLINE unsigned pow2next(unsigned x) { return x<2?1:(1ull << (__bsr32((x)-1)+1)); } + +// Simple binary +#define bicput(bw,br, _u_, _x_, _usize_) bitput( bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) /*AS(_u_ > 0, "Fatal bicput"); AS(_x_ <= _u_, "Fatal bicput2");*/ +#define bicget(bw,br, _u_, _x_, _usize_) bitget57(bw,br, T2(__bsr,_usize_)(_u_) + 1, _x_) +#define BICENC_ bicbenc_ +#define BICDEC_ bicbdec_ +#define BICENC bicbenc +#define BICDEC bicbdec + +#define USIZE 16 +#define uint_t uint16_t +#include "bic.c" + +#define USIZE 32 +#define uint_t uint32_t +#include "bic.c" +#undef bicput +#undef bicget +#undef BICENC_ +#undef BICDEC_ +#undef BICENC +#undef BICDEC + +// Leftmost minimal +#define bicput(bw,br, _u_, _x_, _usize_) { \ + unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u), hi = (1ull << (_b + 1)) - _u - 1;\ + if(_x < hi) bitput(bw,br, _b, _x);\ + else { _x += hi; bitput(bw,br, _b+1, (_x&1)<<_b | _x >> 1); }\ +} + +#define bicget(bw,br, _u_, _x_, _usize_) {\ + unsigned _u = _u_;\ + unsigned _b = T2(__bsr,_usize_)(_u);\ + uint_t _hi = (1ull << (_b + 1)) - _u - 1;\ + if((_x_ = bitpeek57(bw,br,_b)) < _hi) bitrmv(bw,br,_b);\ + else { \ + unsigned _y = (bitbw(bw,br)>>_b)&1;\ + bitrmv(bw,br,_b+1);\ + _x_= (_x_<<1) + _y - _hi;\ + }\ +} +#define BICENC_ bicenc_ +#define BICDEC_ bicdec_ +#define BICENC bicenc +#define BICDEC bicdec + +#define USIZE 16 +#define uint_t uint16_t +#include "bic.c" + +#define USIZE 32 +#define uint_t uint32_t +#include "bic.c" +#undef bicput +#undef bicget +#undef BICENC_ +#undef BICDEC_ +#undef BICENC +#undef BICDEC + +// Center Minimal +#define bicput(bw,br, _u_, _x_, _usize_) { \ + unsigned _x = _x_, _u = _u_, _b = T2(__bsr,_usize_)(_u); \ + uint64_t _c = (1ull << (_b + 1)) - _u - 1; \ + unsigned _c2 = _c >> 1, _r2 = _u >> 1, _lo = _r2-_c2, _hi = _r2+_c2+1;\ + if(!(_u & 1)) _lo -= 1; \ + _b += (_x <= _lo || _x >= _hi);\ + bitput(bw,br, _b, _x);\ +} + +#define bicget(bw,br, _u_, _x_, _usize_) { \ + unsigned _u = _u_, _b = T2(__bsr,_usize_)(_u);\ + uint64_t _c = (1ull << (_b + 1)) - _u - 1;\ + unsigned _c2 = _c>>1, _r2 = _u>>1, _lo = _r2 - _c2;\ + _lo -= ((_u & 1) == 0);\ + if((_x_ = bitpeek57(bw,br,_b)) > _lo) bitrmv(bw,br,_b);\ + else bitget57(bw,br, _b+1, _x_);\ +} +#define BICENC_ bicmenc_ +#define BICDEC_ bicmdec_ +#define BICENC bicmenc +#define BICDEC bicmdec + +#define USIZE 16 +#define uint_t uint16_t +#include "bic.c" + +#define USIZE 32 +#define uint_t uint32_t +#include "bic.c" + +#else //-------------------- Template functions ---------------------------------------------------------------------------------------------------------- +static void T2(BICENC_,USIZE)(uint_t *in, unsigned n, unsigned char **_op, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) { + while(n) + if(hi - lo + 1 != n) { //AC(lo <= hi,"bicenc fatal lo=%d>hi=%d n=%d\n", lo, hi, n); AS(hi - lo >= n - 1, "bicenc_32 fatal hi-lo>n-1\n"); + unsigned x = in[h]; + bicput(*bw, *br, hi-n-lo+1, x-lo-h, USIZE); bitenorm(*bw,*br,*_op); + T2(BICENC_,USIZE)( in, h, _op, lo, x-1, h>>1, bw,br); + in += h+1; n -= h+1; lo = x+1; h = n >> 1; + } else break; +} + +#define RE(a) //a // recursion : RE(a) a +#define RD(a) a // recursion : RD(a) +static void T2(BICDEC_,USIZE)(unsigned char **_ip, unsigned n, uint_t *out, unsigned lo, unsigned hi, unsigned h, uint64_t *bw, unsigned *br) { + RE(if(!n) return); + RD(do) { + if(likely(hi - lo + 1 != n)) { //AS(lo <= hi, "bicdec fatal"); + unsigned x; + bitdnorm(*bw,*br,*_ip); bicget(*bw,*br, hi-lo+1-n, x, USIZE); + out[h] = (x += lo + h); + if(n != 1) { + T2(BICDEC_,USIZE)(_ip, h, out, lo, x-1, h>>1, bw,br); + RE(T2(BICDEC_,USIZE)(_ip,n- h-1, out+ h+1, x+1, hi, (n-h-1)>>1, bw,br)); + RD( n-=h+1; out+=h+1; lo=x+1; h = n>>1); + } RD(else break); + } else { + BITFORSET_(out, n, lo, 1); //for(unsigned i = 0; i != n; ++i) out[i] = lo+i; // + RD(break); + } + } RD(while(n)); +} + +unsigned T2(BICENC,USIZE)(uint_t *in, unsigned n, unsigned char *out) { + if(!n) return 0; //for(unsigned i = 1; i < n; i++) { AC(in[i]>in[i-1], "bicenc32: Not sorted at=%u,count=%d\n", i, n); } //printf("n=%u ", n);printf("%u,", in[i]); + bitdef(bw,br); + unsigned char *op = out; + unsigned x = in[n-1]; + + ctou32(op) = x; op += 4; + T2(BICENC_,USIZE)(in, n-1, &op, 0, x, pow2next(n)>>1, &bw,&br); + bitflush(bw,br,op); + return op - out; +} + +unsigned T2(BICDEC,USIZE)(unsigned char *in, unsigned n, uint_t *out) { + if(!n) return 0; + bitdef(bw,br); + unsigned char *ip = in; + unsigned x = ctou32(ip); + + ip += 4; + out[n-1] = x; + T2(BICDEC_,USIZE)(&ip, n-1, out, 0, x, pow2next(n)>>1, &bw,&br); + bitalign(bw,br,ip); + return ip - in; +} + +#undef USIZE +#undef uint_t +#endif diff --git a/bitpack.c b/bitpack.c index 0aa9641..dcddf27 100644 --- a/bitpack.c +++ b/bitpack.c @@ -1,5 +1,5 @@ /** - Copyright (C) powturbo 2013-2019 + Copyright (C) powturbo 2013-2023 GPL v2 License This program is free software; you can redistribute it and/or modify @@ -23,13 +23,18 @@ **/ // "Integer Compression" bit packing +#pragma warning( disable : 4005) +#pragma warning( disable : 4090) +#pragma warning( disable : 4068) + #include -#define BITUTIL_IN -#define VINT_IN -#include "conf.h" -#include "bitutil.h" -#include "vint.h" -#include "bitpack.h" +#include "include_/conf.h" +#include "include_/bitpack.h" +#include "include_/bitutil.h" + +#include "include_/vlcbyte.h" +#include "include_/bitutil_.h" + #define PAD8(_x_) ( (((_x_)+8-1)/8) ) #ifdef __ARM_NEON @@ -38,13 +43,10 @@ #define PREFETCH(_ip_,_rw_) __builtin_prefetch(_ip_,_rw_) #endif -#pragma warning( disable : 4005) -#pragma warning( disable : 4090) -#pragma warning( disable : 4068) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunsequenced" -#if !defined(SSE2_ON) && !defined(AVX2_ON) //----------------------------------- Plain ----------------------------------------------------------------------- +#ifndef __AVX2__ //----------------------------------- Plain ----------------------------------------------------------------------- typedef unsigned char *(*BITPACK_F8)( uint8_t *__restrict out, unsigned n, const unsigned char *__restrict in); typedef unsigned char *(*BITPACK_D8)( uint8_t *__restrict out, unsigned n, const unsigned char *__restrict in, uint8_t start); typedef unsigned char *(*BITPACK_F16)(uint16_t *__restrict out, unsigned n, const unsigned char *__restrict in); @@ -72,6 +74,7 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define IPI(_ip_) _ip_ += 32 #endif +//---- bitpack --------------- #define IP9(_ip_,_x_, _parm_) #define IPW(_ip_,_x_) VX #define IPX(_ip_,_x_) (V = IP(_ip_,_x_)) @@ -89,11 +92,12 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #undef IP32 #undef IP64 +//----- bitpack delta -------------- #define DELTA -#define IP9(_ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start; start = IP(_ip_,_x_) -#define IPV(_ip_,_x_) VX -#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start) +#define IP9(_ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start; start = IP(_ip_,_x_) +#define IPV(_ip_,_x_) VX +#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start) #define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_) @@ -106,6 +110,7 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #undef IP32 #undef IP64 +//----- bitpack FOR --------------- #define IP9(_ip_,_x_, _parm_) #define IPV(_ip_,_x_) IP(_ip_,_x_) - start #define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start) @@ -121,9 +126,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #undef IP32 #undef IP64 -#define IP9( _ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start - 1; start = IP(_ip_,_x_) -#define IPV( _ip_,_x_) VX -#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start - 1) +//----- bitpack delta 1 ----------- +#define IP9( _ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start - 1; start = IP(_ip_,_x_) +#define IPV( _ip_,_x_) VX +#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start - 1) #define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_) @@ -143,9 +149,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define _BITPACK_ bitepack #include "bitpack_.h"*/ -#define IP9(_ip_,_x_, _parm_) V = TEMPLATE2(zigzagenc, USIZE)(IP(_ip_,_x_) - start); start = IP(_ip_,_x_) +//------ bitpack zigzag -------------------- +#define IP9(_ip_,_x_, _parm_) V = T2(zigzagenc, USIZE)(IP(_ip_,_x_) - start); start = IP(_ip_,_x_) #define IPV(_ip_,_x_) VX -#define IPX(_ip_,_x_) (V = TEMPLATE2(zigzagenc, USIZE)(IP(_ip_,_x_) - start)) +#define IPX(_ip_,_x_) (V = T2(zigzagenc, USIZE)(IP(_ip_,_x_) - start)) #define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_) @@ -158,6 +165,23 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #undef IP32 #undef IP64 +//------ bitpack xor -------------------- +#define IP9(_ip_,_x_, _parm_) V = IP(_ip_,_x_) ^ start; start = IP(_ip_,_x_) +#define IPV(_ip_,_x_) VX +#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) ^ start) +#define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_) +#define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_) +#define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_) +#define _BITPACK_ bitxpack +#include "bitpack_.h" +#undef IP9 +#undef IPV +#undef IPX +#undef IP16 +#undef IP32 +#undef IP64 + +//----- bitpack FOR 1 --------------------- #define IPI(_ip_) _ip_ += 32; start += 32 #define IP9(_ip_,_x_, _parm_) #define IPV(_ip_,_x_) (IP(_ip_,_x_) - start - (_x_) - 1) @@ -175,14 +199,14 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #undef IP32 #undef IP64 +//----------------------------------- bitnpack ---------------------------------------------------- #define BITNPACK(in, n, out, _csize_, _usize_) { unsigned char *op = out;\ for(ip = in, in += n; ip < in;) { \ - TEMPLATE3(uint, _usize_, _t) o,x;\ + T3(uint, _usize_, _t) o,x;\ unsigned iplen = in - ip,b; \ - if(iplen > _csize_) iplen = _csize_;\ - PREFETCH(ip+512,0);\ - o = TEMPLATE2(bit,_usize_)(ip, iplen, &x); b = TEMPLATE2(bsr,_usize_)(o);\ - *op++ = b; op = TEMPLATE2(bitpacka, _usize_)[b](ip, iplen, op);\ + if(iplen > _csize_) iplen = _csize_; PREFETCH(ip+512,0);\ + o = T2(bit,_usize_)(ip, iplen, &x); b = T2(bsr,_usize_)(o);\ + *op++ = b; op = T2(bitpacka, _usize_)[b](ip, iplen, op);\ ip += iplen;\ }\ return op - out;\ @@ -190,14 +214,15 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define BITNDPACK(in, n, out, _csize_, _usize_, _bitd_, _bitpacka_) { if(!n) return 0;\ unsigned char *op = out; \ - TEMPLATE3(uint, _usize_, _t) o,x;\ + T3(uint, _usize_, _t) o,x;\ start = *in++; \ - TEMPLATE2(vbxput, _usize_)(op, start);\ - for(n--,ip = in; ip != in + (n&~(_csize_-1)); ) { unsigned b; PREFETCH(ip+512,0);\ - o = TEMPLATE2(_bitd_, _usize_)(ip, _csize_, &x, start); b = TEMPLATE2(bsr,_usize_)(o); *op++ = b; op = TEMPLATE2(_bitpacka_,_usize_)[b](ip, _csize_, op, start); ip += _csize_; start = ip[-1];\ + T2(vbxput, _usize_)(op, start);\ + for(n--,ip = in; ip != in + (n&~(_csize_-1)); ) { \ + unsigned b; PREFETCH(ip+512,0);\ + o = T2(_bitd_, _usize_)(ip, _csize_, &x, start); b = T2(bsr,_usize_)(o); *op++ = b; op = T2(_bitpacka_,_usize_)[b](ip, _csize_, op, start); ip += _csize_; start = ip[-1];\ }\ if(n&=(_csize_-1)) { unsigned b;\ - o = TEMPLATE2(_bitd_, _usize_)(ip, n, &x, start); b = TEMPLATE2(bsr,_usize_)(o); *op++ = b; op = TEMPLATE2(_bitpacka_,_usize_)[b](ip, n, op, start);\ + o = T2(_bitd_, _usize_)(ip, n, &x, start); b = T2(bsr,_usize_)(o); *op++ = b; op = T2(_bitpacka_,_usize_)[b](ip, n, op, start);\ }\ return op - out;\ } @@ -222,28 +247,33 @@ size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitz, bitzpacka); } size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitz, bitzpacka); } +size_t bitnxpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitx, bitxpacka); } +size_t bitnxpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitx, bitxpacka); } +size_t bitnxpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitx, bitxpacka); } +size_t bitnxpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitx, bitxpacka); } + size_t bitnfpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitf, bitfpacka); } size_t bitnfpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitf, bitfpacka); } size_t bitnfpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitf, bitfpacka); } size_t bitnfpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitf, bitfpacka); } - -#else //--------------------------------------- SIMD ---------------------------------------------------------------------------------------------- + #endif +//#else //--------------------------------------- SIMD ---------------------------------------------------------------------------------------------- #define _BITNPACKV(in, n, out, _csize_, _usize_, _bitpackv_) {\ - unsigned char *op = out; TEMPLATE3(uint, _usize_, _t) _o,_x;\ + unsigned char *op = out; T3(uint, _usize_, _t) _o,_x;\ for(ip = in; ip != in + (n&~(_csize_-1)); ip += _csize_) { PREFETCH(ip+512,0);\ - unsigned _b; _o = TEMPLATE2(bit,_usize_)(ip, _csize_, &_x); _b = TEMPLATE2(bsr,_usize_)(_o); *op++ = _b; op = TEMPLATE2(_bitpackv_, _usize_)(ip, _csize_, op, _b);\ - } if(n&=(_csize_-1)) { unsigned _b; _o = TEMPLATE2(bit,_usize_)(ip, n, &_x); _b = TEMPLATE2(bsr,_usize_)(_o); *op++ = _b; op = TEMPLATE2(bitpack, _usize_)(ip, n, op, _b); }\ + unsigned _b; _o = T2(bit,_usize_)(ip, _csize_, &_x); _b = T2(bsr,_usize_)(_o); *op++ = _b; op = T2(_bitpackv_, _usize_)(ip, _csize_, op, _b);\ + } if(n&=(_csize_-1)) { unsigned _b; _o = T2(bit,_usize_)(ip, n, &_x); _b = T2(bsr,_usize_)(_o); *op++ = _b; op = T2(bitpack, _usize_)(ip, n, op, _b); }\ return op - out;\ } #define _BITNDPACKV(in, n, out, _csize_, _usize_, _bitdv_, _bitpackv_, _bitd_, _bitpack_) { if(!n) return 0;\ - unsigned char *op = out; TEMPLATE3(uint, _usize_, _t) _o,_x;\ + unsigned char *op = out; T3(uint, _usize_, _t) _o,_x;\ start = *in++; \ - TEMPLATE2(vbxput, _usize_)(op, start);\ + T2(vbxput, _usize_)(op, start);\ for(n--,ip = in; ip != in + (n&~(_csize_-1)); ) { PREFETCH(ip+512,0);\ - unsigned _b; _o = TEMPLATE2(_bitdv_, _usize_)(ip, _csize_, &_x, start); _b = TEMPLATE2(bsr,_usize_)(_o); *op++ = _b; op = TEMPLATE2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b); ip += _csize_; start = ip[-1];\ - } if(n&=(_csize_-1)) { unsigned _b; _o = TEMPLATE2(_bitd_, _usize_)(ip, n, &_x, start); _b = TEMPLATE2(bsr,_usize_)(_o); *op++ = _b; op = TEMPLATE2(_bitpack_, _usize_)(ip, n, op, start, _b); }\ + unsigned _b; _o = T2(_bitdv_, _usize_)(ip, _csize_, &_x, start); _b = T2(bsr,_usize_)(_o); *op++ = _b; op = T2(_bitpackv_, _usize_)(ip, _csize_, op, start, _b); ip += _csize_; start = ip[-1];\ + } if(n&=(_csize_-1)) { unsigned _b; _o = T2(_bitd_, _usize_)(ip, n, &_x, start); _b = T2(bsr,_usize_)(_o); *op++ = _b; op = T2(_bitpack_, _usize_)(ip, n, op, start, _b); }\ return op - out;\ } @@ -258,63 +288,88 @@ size_t bitnfpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict #define OPPE(__op) #define IPPE(__op) +//--- bitpack --------------- #define VI32(ip, i, iv, parm) #define IP32(ip, i, iv) _mm256_loadu_si256(ip++) -unsigned char *bitpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(256*b); BITPACK256V32(in, b, out, 0); return pout; } +unsigned char *bitpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(256*b); BITPACK256V32(in, b, out, 0); return pout; } #undef VI32 #undef IP32 - +//-- bipack FOR ------------ #define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm256_sub_epi32(_mm256_loadu_si256(_ip_++),sv) #define IP32(_ip_, i, _iv_) _iv_ #include "bitpack_.h" -unsigned char *bitfpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b); - __m256i v, sv = _mm256_set1_epi32(start); +unsigned char *bitfpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(256*b); + __m256i sv = _mm256_set1_epi32(start), v; BITPACK256V32(in, b, out, sv); return pout; } #define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm256_sub_epi32(_mm256_loadu_si256(_ip_++),_sv_); _sv_ = _mm256_add_epi32(_sv_,cv); #define IP32(ip, i, _iv_) _iv_ -unsigned char *bitf1pack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b); - __m256i v, sv = _mm256_set_epi32(start+8,start+7,start+6,start+5,start+4,start+3,start+2,start+1), cv = _mm256_set1_epi32(8); - BITPACK256V32(in, b, out, sv); return pout; +unsigned char *bitf1pack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(256*b); + __m256i v, sv = _mm256_set_epi32(start+8,start+7,start+6,start+5,start+4,start+3,start+2,start+1), + cv = _mm256_set1_epi32(8); + BITPACK256V32(in, b, out, sv); + return pout; } +//-- bitpack delta ----------- #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = mm256_delta_epi32(v,_sv_); _sv_ = v #define IP32(ip, i, _iv_) _iv_ #include "bitpack_.h" -unsigned char *bitdpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b); +unsigned char *bitdpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(256*b); __m256i v,sv = _mm256_set1_epi32(start); BITPACK256V32(in, b, out, sv); return pout; } +//-- bitpack delta 1 -------- #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = _mm256_sub_epi32(mm256_delta_epi32(v,_sv_),cv); _sv_ = v -unsigned char *bitd1pack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b); - __m256i v, sv = _mm256_set1_epi32(start), cv = _mm256_set1_epi32(1); +unsigned char *bitd1pack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(256*b); + __m256i sv = _mm256_set1_epi32(start), v, + cv = _mm256_set1_epi32(1); BITPACK256V32(in, b, out, sv); return pout; } +//-- bitpack zigzag ----------- #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = mm256_delta_epi32(v,_sv_); _sv_ = v; _iv_ = mm256_zzage_epi32(_iv_) -unsigned char *bitzpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b); - __m256i v, sv = _mm256_set1_epi32(start), cv = _mm256_set1_epi32(1); +unsigned char *bitzpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(256*b); + __m256i sv = _mm256_set1_epi32(start), v, + cv = _mm256_set1_epi32(1); BITPACK256V32(in, b, out, sv); return pout; } -size_t bitnpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; _BITNPACKV( in, n, out, 256, 32, bitpack256v); } -size_t bitndpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd, bitdpack256v, bitd, bitdpack); } -size_t bitnd1pack256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd1, bitd1pack256v,bitd1, bitd1pack); } -size_t bitnzpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitz, bitzpack256v, bitz, bitzpack); } -size_t bitnfpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitf, bitfpack256v, bitf, bitfpack); } +//-- bitpack xor ----------- +#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = mm256_xore_epi32(v,_sv_); _sv_ = v; +unsigned char *bitxpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(256*b); + __m256i sv = _mm256_set1_epi32(start), v; + BITPACK256V32(in, b, out, sv); + return pout; +} - #elif defined(__SSE2__) || defined(__ARM_NEON) //----------------------------- SSE --------------------------------------------------------------- +//--------------------------------------------------- bitnpack -------------------------------------------------------------------------------------------------- +size_t bitnpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; _BITNPACKV( in, n, out, 256, 32, bitpack256v); } +size_t bitndpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd256v, bitdpack256v, bitd, bitdpack); } +size_t bitnd1pack256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitd1256v, bitd1pack256v,bitd1, bitd1pack); } +size_t bitnzpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitz256v, bitzpack256v, bitz, bitzpack); } +size_t bitnfpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitf, bitfpack256v, bitf, bitfpack); } +size_t bitnxpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 256, 32, bitx256v, bitxpack256v, bitx, bitxpack); } + + #elif defined(__SSE3__) || defined(__ARM_NEON) //----------------------------- SSE --------------------------------------------------------------- #define OPPE(__op) #define IPPE(__op) +//-- bitpack ------- #define VI16(ip, i, iv, parm) #define VI32(ip, i, iv, parm) #define IP16(_ip_, i, iv) _mm_loadu_si128(_ip_++) @@ -322,8 +377,15 @@ size_t bitnfpack256v32( uint32_t *__restrict in, size_t n, unsigned char *__rest #include "bitpack_.h" unsigned char *bitpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V16(in, b, out, 0); return pout; } unsigned char *bitpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; } -unsigned char *bitpack256w32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *_out=out; unsigned *_in=in; -BITPACK128V32(in, b, out, 0); in = _in+128; out = _out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return _out+PAD8(256*b); } +unsigned char *bitpack256w32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { + unsigned char *_out = out; + unsigned *_in = in; + BITPACK128V32(in, b, out, 0); + in = _in+128; + out = _out+PAD8(128*b); + BITPACK128V32(in, b, out, 0); + return _out+PAD8(256*b); +} #ifdef __ARM_NEON //#define IP32(_ip_, i, iv) _mm_or_si128(_mm_shuffle_epi32( _mm_loadu_si128(_ip_++),_MM_SHUFFLE(3, 1, 2, 0)), _mm_shuffle_epi32( _mm_loadu_si128(_ip_++),_MM_SHUFFLE(2, 0, 3, 1)) ) @@ -333,75 +395,146 @@ BITPACK128V32(in, b, out, 0); in = _in+128; out = _out+PAD8(128*b); BITPACK128V3 #endif #include "bitpack_.h" unsigned char *bitpack128v64(uint64_t *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { - if(b<=32) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; } else return bitpack64(in,n,out,b); + if(b <= 32) { + unsigned char *pout = out+PAD8(128*b); + BITPACK128V32(in, b, out, 0); + return pout; + } else return bitpack64(in,n,out,b); } +//-- bitpack delta ------------ #define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = mm_delta_epi16(v,_sv_); _sv_ = v #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = mm_delta_epi32(v,_sv_); _sv_ = v #define IP16(ip, i, _iv_) _iv_ #define IP32(ip, i, _iv_) _iv_ #include "bitpack_.h" -unsigned char *bitdpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v,sv = _mm_set1_epi16(start); BITPACK128V16(in, b, out, sv); return pout; +unsigned char *bitdpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi16(start), v; + BITPACK128V16(in, b, out, sv); + return pout; } -unsigned char *bitdpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v,sv = _mm_set1_epi32(start); BITPACK128V32(in, b, out, sv); return pout; +unsigned char *bitdpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi32(start), v; + BITPACK128V32(in, b, out, sv); + return pout; } +//-- bitpack FOR ---------- #define VI16(_ip_, _i_, _iv_, _sv_) #define VI32(_ip_, _i_, _iv_, _sv_) #define IP16(_ip_, i, _iv_) _mm_sub_epi16(_mm_loadu_si128(_ip_++),sv) #define IP32(_ip_, i, _iv_) _mm_sub_epi32(_mm_loadu_si128(_ip_++),sv) #include "bitpack_.h" -unsigned char *bitfpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi16(start); BITPACK128V16(in, b, out, sv); return pout; +unsigned char *bitfpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi16(start), v; + BITPACK128V16(in, b, out, sv); + return pout; } -unsigned char *bitfpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi32(start); BITPACK128V32(in, b, out, sv); return pout; +unsigned char *bitfpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi32(start), v; + BITPACK128V32(in, b, out, sv); + return pout; } +//-- bitpack delta 1 ------------------------- #define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi16(mm_delta_epi16(v,_sv_),cv); _sv_ = v #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi32(mm_delta_epi32(v,_sv_),cv); _sv_ = v #define IP16(ip, i, _iv_) _iv_ #define IP32(ip, i, _iv_) _iv_ -unsigned char *bitd1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(1), v; BITPACK128V16(in, b, out, sv); return pout; -} -unsigned char *bitd1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1); BITPACK128V32(in, b, out, sv); return pout; +unsigned char *bitd1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi16(start), + cv = _mm_set1_epi16(1), v; + BITPACK128V16(in, b, out, sv); + return pout; } +unsigned char *bitd1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi32(start), v, + cv = _mm_set1_epi32(1); + BITPACK128V32(in, b, out, sv); + return pout; +} + +//-- bitpack sub ------- #define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi16(SUBI16x8(v,_sv_),cv); _sv_ = v #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi32(SUBI32x4(v,_sv_),cv); _sv_ = v #define IP16(ip, i, _iv_) _iv_ #define IP32(ip, i, _iv_) _iv_ -unsigned char *bits1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(8); BITPACK128V16(in, b, out, sv); return pout; +unsigned char *bits1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi16(start), v, + cv = _mm_set1_epi16(8); + BITPACK128V16(in, b, out, sv); + return pout; } -unsigned char *bits1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(4); BITPACK128V32(in, b, out, sv); return pout; +unsigned char *bits1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi32(start), v, + cv = _mm_set1_epi32(4); + BITPACK128V32(in, b, out, sv); + return pout; } +//-- bitpack FOR 1 ----- #define VI16(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi16(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi16(_sv_,cv); #define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi32(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi32(_sv_,cv); #define IP16(ip, i, _iv_) _iv_ #define IP32(ip, i, _iv_) _iv_ -unsigned char *bitf1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set_epi16(start+8,start+7,start+6,start+5,start+4,start+3,start+2,start+1), cv = _mm_set1_epi16(8); BITPACK128V16(in, b, out, sv); return pout; +unsigned char *bitf1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set_epi16(start+8,start+7,start+6,start+5,start+4,start+3,start+2,start+1), v, + cv = _mm_set1_epi16(8); + BITPACK128V16(in, b, out, sv); + return pout; } -unsigned char *bitf1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set_epi32( start+4,start+3,start+2,start+1), cv = _mm_set1_epi32(4); BITPACK128V32(in, b, out, sv); return pout; +unsigned char *bitf1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set_epi32( start+4,start+3,start+2,start+1), v, + cv = _mm_set1_epi32(4); BITPACK128V32(in, b, out, sv); + return pout; } +//-- bitpack zigzag ------- #define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = mm_delta_epi16(v,_sv_); _sv_ = v; _iv_ = mm_zzage_epi16(_iv_) -unsigned char *bitzpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(1); BITPACK128V16(in, b, out, sv); return pout; +unsigned char *bitzpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi16(start), v, + cv = _mm_set1_epi16(1); + BITPACK128V16(in, b, out, sv); + return pout; } #define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = mm_delta_epi32(v,_sv_); _sv_ = v; _iv_ = mm_zzage_epi32(_iv_) -unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b); - __m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1); BITPACK128V32(in, b, out, sv); return pout; +unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi32(start), v, + cv = _mm_set1_epi32(1); + BITPACK128V32(in, b, out, sv); + return pout; } +//-- bitpack xor ------- +#define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = mm_xore_epi16(v,_sv_); _sv_ = v; +unsigned char *bitxpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi16(start), v; + BITPACK128V16(in, b, out, sv); + return pout; +} +#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = mm_xore_epi32(v,_sv_); _sv_ = v; +unsigned char *bitxpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { + unsigned char *pout = out+PAD8(128*b); + __m128i sv = _mm_set1_epi32(start), v; + BITPACK128V32(in, b, out, sv); + return pout; +} + +//---------------------------- binpack -------------------------------------------------------------------------------------------------------------------------- size_t bitnpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip; _BITNPACKV( in, n, out, 128, 16, bitpack128v); } size_t bitnpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; _BITNPACKV( in, n, out, 128, 32, bitpack128v); } size_t bitnpack128v64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip; _BITNPACKV( in, n, out, 128, 64, bitpack128v); } @@ -419,9 +552,12 @@ size_t bitns1pack128v32(uint32_t *__restrict in, size_t n, unsigned char *__rest size_t bitnzpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitz, bitzpack128v, bitz, bitzpack); } size_t bitnzpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitz, bitzpack128v, bitz, bitzpack); } +size_t bitnxpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitx, bitxpack128v, bitx, bitxpack); } +size_t bitnxpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitx, bitxpack128v, bitx, bitxpack); } + size_t bitnfpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACKV(in, n, out, 128, 16, bitf, bitfpack128v, bitf, bitfpack); } size_t bitnfpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACKV(in, n, out, 128, 32, bitf, bitfpack128v, bitf, bitfpack); } #endif // SSE -#endif // Plain +//#endif // Plain #pragma clang diagnostic pop