diff --git a/bitpack.c b/bitpack.c index d337123..4218409 100644 --- a/bitpack.c +++ b/bitpack.c @@ -44,6 +44,7 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define PREFETCH(_ip_) __builtin_prefetch(_ip_+768,0)//#define PREFETCH(ip) +#if !defined(SSE2_ON) && !defined(AVX2_ON) #if 0 #define IP0(_ip_,_x_) *_ip_ #define IP( _ip_,_x_) *_ip_++ @@ -62,6 +63,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define IPP(_ip_,_x_, _parm_) #define _BITPACK_ bitpack #include "bitpack_.h" +#undef IPB +#undef IPV +#undef IPX +#undef IPP #define DELTA @@ -71,6 +76,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define _BITPACK_ bitdpack #include "bitpack_.h" +#undef IPB +#undef IPV +#undef IPX +#undef IPP #define IPB(_ip_,_x_, _parm_) #define IPV(_ip_,_x_) IP(_ip_,_x_) - start @@ -78,6 +87,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define IPP(_ip_,_x_, _parm_) #define _BITPACK_ bitfpack #include "bitpack_.h" +#undef IPB +#undef IPV +#undef IPX +#undef IPP #define IPB( _ip_,_x_, _parm_) v = IP0(_ip_,_x_) - start - 1; start = IP(_ip_,_x_) #define IPV( _ip_,_x_) v @@ -85,6 +98,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define _BITPACK_ bitd1pack #include "bitpack_.h" +#undef IPB +#undef IPV +#undef IPX +#undef IPP /*#define IPB( _ip_,_x_, _parm_) v = IP(_ip_,_x_) - start - mdelta; start = IP(_ip_,_x_) #define IPV( _ip_,_x_) v @@ -99,6 +116,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_) #define _BITPACK_ bitzpack #include "bitpack_.h" +#undef IPB +#undef IPV +#undef IPX +#undef IPP #define IPI(_ip_) _ip_ += 32; start += 32 #define IPB(_ip_,_x_, _parm_) @@ -108,12 +129,16 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons #define _BITPACK_ bitf1pack #include "bitpack_.h" #undef IPI +#undef IPB +#undef IPV +#undef IPX +#undef IPP #define BITNPACK(in, n, out, csize, usize) { unsigned char *op = out;\ for(ip = in, in += n; ip < in;) { \ unsigned iplen = in - ip,b;\ if(iplen > csize) iplen = csize; PREFETCH(ip+512);\ - TEMPLATE2(BITSIZE,usize)(ip, iplen, b);\ + TEMPLATE2(BITSIZE,usize)(ip, iplen, b); /*if(b!=usize) printf("$%d ", b);*/\ *op++ = b; \ op = TEMPLATE2(bitpacka, usize)[b](ip, csize, op); \ ip += iplen;\ @@ -157,9 +182,9 @@ size_t bitnzpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitz, bitzpacka); } size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitz, bitzpacka); } size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitz, bitzpacka); } +#endif -//---------------------------------------------------------------------------------------------------------------------- -#ifdef __SSE2__ +#if defined(__SSE2__) && defined(SSE2_ON) #include #define OPPE(__op) @@ -169,6 +194,8 @@ size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict #define IPP(ip, i, iv) _mm_loadu_si128(ip++) #include "bitpack_.h" unsigned char *bitpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; } +unsigned char *bitpack256w32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *_out=out; unsigned *_in=in; +BITPACK128V32(in, b, out, 0); in = _in+128; out = _out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return _out+PAD8(256*b); } #define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v #define IPP(ip, i, _iv_) _iv_ @@ -210,7 +237,7 @@ unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigne } #endif -#ifdef __AVX2__ +#if defined(__AVX2__) && defined(AVX2_ON) #include #define OPPE(__op)