TurboPFor: Bit Packing

This commit is contained in:
powturbo
2017-06-28 17:18:43 +02:00
parent dd01de287a
commit ff0061bd32

View File

@ -44,6 +44,7 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define PREFETCH(_ip_) __builtin_prefetch(_ip_+768,0)//#define PREFETCH(ip)
#if !defined(SSE2_ON) && !defined(AVX2_ON)
#if 0
#define IP0(_ip_,_x_) *_ip_
#define IP( _ip_,_x_) *_ip_++
@ -62,6 +63,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define IPP(_ip_,_x_, _parm_)
#define _BITPACK_ bitpack
#include "bitpack_.h"
#undef IPB
#undef IPV
#undef IPX
#undef IPP
#define DELTA
@ -71,6 +76,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
#define _BITPACK_ bitdpack
#include "bitpack_.h"
#undef IPB
#undef IPV
#undef IPX
#undef IPP
#define IPB(_ip_,_x_, _parm_)
#define IPV(_ip_,_x_) IP(_ip_,_x_) - start
@ -78,6 +87,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define IPP(_ip_,_x_, _parm_)
#define _BITPACK_ bitfpack
#include "bitpack_.h"
#undef IPB
#undef IPV
#undef IPX
#undef IPP
#define IPB( _ip_,_x_, _parm_) v = IP0(_ip_,_x_) - start - 1; start = IP(_ip_,_x_)
#define IPV( _ip_,_x_) v
@ -85,6 +98,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
#define _BITPACK_ bitd1pack
#include "bitpack_.h"
#undef IPB
#undef IPV
#undef IPX
#undef IPP
/*#define IPB( _ip_,_x_, _parm_) v = IP(_ip_,_x_) - start - mdelta; start = IP(_ip_,_x_)
#define IPV( _ip_,_x_) v
@ -99,6 +116,10 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
#define _BITPACK_ bitzpack
#include "bitpack_.h"
#undef IPB
#undef IPV
#undef IPX
#undef IPP
#define IPI(_ip_) _ip_ += 32; start += 32
#define IPB(_ip_,_x_, _parm_)
@ -108,12 +129,16 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#define _BITPACK_ bitf1pack
#include "bitpack_.h"
#undef IPI
#undef IPB
#undef IPV
#undef IPX
#undef IPP
#define BITNPACK(in, n, out, csize, usize) { unsigned char *op = out;\
for(ip = in, in += n; ip < in;) { \
unsigned iplen = in - ip,b;\
if(iplen > csize) iplen = csize; PREFETCH(ip+512);\
TEMPLATE2(BITSIZE,usize)(ip, iplen, b);\
TEMPLATE2(BITSIZE,usize)(ip, iplen, b); /*if(b!=usize) printf("$%d ", b);*/\
*op++ = b; \
op = TEMPLATE2(bitpacka, usize)[b](ip, csize, op); \
ip += iplen;\
@ -157,9 +182,9 @@ size_t bitnzpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict
size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitz, bitzpacka); }
size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitz, bitzpacka); }
size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitz, bitzpacka); }
#endif
//----------------------------------------------------------------------------------------------------------------------
#ifdef __SSE2__
#if defined(__SSE2__) && defined(SSE2_ON)
#include <emmintrin.h>
#define OPPE(__op)
@ -169,6 +194,8 @@ size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict
#define IPP(ip, i, iv) _mm_loadu_si128(ip++)
#include "bitpack_.h"
unsigned char *bitpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; }
unsigned char *bitpack256w32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *_out=out; unsigned *_in=in;
BITPACK128V32(in, b, out, 0); in = _in+128; out = _out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return _out+PAD8(256*b); }
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v
#define IPP(ip, i, _iv_) _iv_
@ -210,7 +237,7 @@ unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigne
}
#endif
#ifdef __AVX2__
#if defined(__AVX2__) && defined(AVX2_ON)
#include <immintrin.h>
#define OPPE(__op)