TurboPFor: Bit Packing
This commit is contained in:
225
bitpack.c
225
bitpack.c
@ -65,84 +65,102 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
|
||||
#define IPI(_ip_) _ip_ += 32
|
||||
#endif
|
||||
|
||||
#define IPB(_ip_,_x_, _parm_)
|
||||
#define IP9(_ip_,_x_, _parm_)
|
||||
#define IPW(_ip_,_x_) VX
|
||||
#define IPX(_ip_,_x_) (V = IP(_ip_,_x_))
|
||||
|
||||
#define IPV(_ip_,_x_) IP(_ip_,_x_)
|
||||
#define IPP(_ip_,_x_, _parm_)
|
||||
#define IP16(_ip_,_x_, _parm_)
|
||||
#define IP32(_ip_,_x_, _parm_)
|
||||
#define IP64(_ip_,_x_, _parm_)
|
||||
#define _BITPACK_ bitpack
|
||||
#include "bitpack_.h"
|
||||
#undef IPB
|
||||
#undef IP9
|
||||
#undef IPV
|
||||
#undef IPX
|
||||
#undef IPP
|
||||
#undef IP16
|
||||
#undef IP32
|
||||
|
||||
#define DELTA
|
||||
|
||||
#define IPB(_ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start; start = IP(_ip_,_x_)
|
||||
#define IP9(_ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start; start = IP(_ip_,_x_)
|
||||
#define IPV(_ip_,_x_) VX
|
||||
#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start)
|
||||
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define _BITPACK_ bitdpack
|
||||
#include "bitpack_.h"
|
||||
#undef IPB
|
||||
#undef IP9
|
||||
#undef IPV
|
||||
#undef IPX
|
||||
#undef IPP
|
||||
#undef IP16
|
||||
#undef IP32
|
||||
|
||||
#define IPB(_ip_,_x_, _parm_)
|
||||
#define IP9(_ip_,_x_, _parm_)
|
||||
#define IPV(_ip_,_x_) IP(_ip_,_x_) - start
|
||||
#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start)
|
||||
#define IPP(_ip_,_x_, _parm_)
|
||||
#define IP16(_ip_,_x_, _parm_)
|
||||
#define IP32(_ip_,_x_, _parm_)
|
||||
#define IP64(_ip_,_x_, _parm_)
|
||||
#define _BITPACK_ bitfpack
|
||||
#include "bitpack_.h"
|
||||
#undef IPB
|
||||
#undef IP9
|
||||
#undef IPV
|
||||
#undef IPX
|
||||
#undef IPP
|
||||
#undef IP16
|
||||
#undef IP32
|
||||
|
||||
#define IPB( _ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start - 1; start = IP(_ip_,_x_)
|
||||
#define IP9( _ip_,_x_, _parm_) V = IP0(_ip_,_x_) - start - 1; start = IP(_ip_,_x_)
|
||||
#define IPV( _ip_,_x_) VX
|
||||
#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start - 1)
|
||||
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define _BITPACK_ bitd1pack
|
||||
#include "bitpack_.h"
|
||||
#undef IPB
|
||||
#undef IP9
|
||||
#undef IPV
|
||||
#undef IPX
|
||||
#undef IPP
|
||||
#undef IP16
|
||||
#undef IP32
|
||||
|
||||
/*#define IPB( _ip_,_x_, _parm_) v = IP(_ip_,_x_) - start - mdelta; start = IP(_ip_,_x_)
|
||||
/*#define IP9( _ip_,_x_, _parm_) v = IP(_ip_,_x_) - start - mdelta; start = IP(_ip_,_x_)
|
||||
#define IPV( _ip_,_x_) v
|
||||
#define IPX(_ip_,_x_) (v = IP(_ip_,_x_) - start - mdelta)
|
||||
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define _BITPACK_ bitepack
|
||||
#include "bitpack_.h"*/
|
||||
|
||||
#define IPB(_ip_,_x_, _parm_) V = TEMPLATE2(zigzagenc, USIZE)(IP(_ip_,_x_) - start); start = IP(_ip_,_x_)
|
||||
#define IP9(_ip_,_x_, _parm_) V = TEMPLATE2(zigzagenc, USIZE)(IP(_ip_,_x_) - start); start = IP(_ip_,_x_)
|
||||
#define IPV(_ip_,_x_) VX
|
||||
#define IPX(_ip_,_x_) (V = TEMPLATE2(zigzagenc, USIZE)(IP(_ip_,_x_) - start))
|
||||
#define IPP(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP16(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP32(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define IP64(_ip_,_x_, _parm_) start = IP(_ip_,_x_)
|
||||
#define _BITPACK_ bitzpack
|
||||
#include "bitpack_.h"
|
||||
#undef IPB
|
||||
#undef IP9
|
||||
#undef IPV
|
||||
#undef IPX
|
||||
#undef IPP
|
||||
#undef IP16
|
||||
#undef IP32
|
||||
|
||||
#define IPI(_ip_) _ip_ += 32; start += 32
|
||||
#define IPB(_ip_,_x_, _parm_)
|
||||
#define IP9(_ip_,_x_, _parm_)
|
||||
#define IPV(_ip_,_x_) (IP(_ip_,_x_) - start - (_x_) - 1)
|
||||
#define IPX(_ip_,_x_) (V = IP(_ip_,_x_) - start - (_x_) - 1)
|
||||
#define IPP(_ip_,_x_, _parm_)
|
||||
#define IP16(_ip_,_x_, _parm_)
|
||||
#define IP32(_ip_,_x_, _parm_)
|
||||
#define IP64(_ip_,_x_, _parm_)
|
||||
#define _BITPACK_ bitf1pack
|
||||
#include "bitpack_.h"
|
||||
#undef IPI
|
||||
#undef IPB
|
||||
#undef IP9
|
||||
#undef IPV
|
||||
#undef IPX
|
||||
#undef IPP
|
||||
#undef IP16
|
||||
#undef IP32
|
||||
|
||||
#define BITNPACK(in, n, out, csize, usize) { unsigned char *op = out;\
|
||||
for(ip = in, in += n; ip < in;) { \
|
||||
@ -171,25 +189,26 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
|
||||
return op - out;\
|
||||
}
|
||||
|
||||
size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNPACK(in, n, out, 128, 8); }
|
||||
size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNPACK(in, n, out, 128, 16); }
|
||||
size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNPACK(in, n, out, 128, 32); }
|
||||
size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNPACK(in, n, out, 128, 8); }
|
||||
size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNPACK(in, n, out, 128, 16); }
|
||||
size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNPACK(in, n, out, 128, 32); }
|
||||
size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNPACK(in, n, out, 128, 64); }
|
||||
|
||||
size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd, bitdpacka); }
|
||||
size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd, bitdpacka); }
|
||||
size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd, bitdpacka); }
|
||||
size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd, bitdpacka); }
|
||||
size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd, bitdpacka); }
|
||||
size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd, bitdpacka); }
|
||||
size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitd, bitdpacka); }
|
||||
|
||||
size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack64(uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitd1, bitd1packa); }
|
||||
|
||||
size_t bitnzpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitz, bitzpacka); }
|
||||
size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitz, bitzpacka); }
|
||||
size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitz, bitzpacka); }
|
||||
size_t bitnzpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitz, bitzpacka); }
|
||||
size_t bitnzpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitz, bitzpacka); }
|
||||
size_t bitnzpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitz, bitzpacka); }
|
||||
size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitz, bitzpacka); }
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) && defined(SSE2_ON)
|
||||
@ -198,51 +217,101 @@ size_t bitnzpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict
|
||||
#define OPPE(__op)
|
||||
#define IPPE(__op)
|
||||
|
||||
#define VSTI(ip, i, iv, parm)
|
||||
#define IPP(ip, i, iv) _mm_loadu_si128(ip++)
|
||||
#define VI16(ip, i, iv, parm)
|
||||
#define VI32(ip, i, iv, parm)
|
||||
#define IP16(ip, i, iv) _mm_loadu_si128(ip++)
|
||||
#define IP32(ip, i, iv) _mm_loadu_si128(ip++)
|
||||
#include "bitpack_.h"
|
||||
unsigned char *bitpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V16(in, b, out, 0); return pout; }
|
||||
unsigned char *bitpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return pout; }
|
||||
unsigned char *bitpack256w32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *_out=out; unsigned *_in=in;
|
||||
BITPACK128V32(in, b, out, 0); in = _in+128; out = _out+PAD8(128*b); BITPACK128V32(in, b, out, 0); return _out+PAD8(256*b); }
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v
|
||||
#define IPP(ip, i, _iv_) _iv_
|
||||
#define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x16(v,_sv_); _sv_ = v
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v
|
||||
#define IP16(ip, i, _iv_) _iv_
|
||||
#define IP32(ip, i, _iv_) _iv_
|
||||
#include "bitpack_.h"
|
||||
unsigned char *bitdpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v,sv = _mm_set1_epi32(start);
|
||||
BITPACK128V32(in, b, out, sv);
|
||||
return pout;
|
||||
unsigned char *bitdpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v,sv = _mm_set1_epi16(start); BITPACK128V16(in, b, out, sv); return pout;
|
||||
}
|
||||
unsigned char *bitdpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v,sv = _mm_set1_epi32(start); BITPACK128V32(in, b, out, sv); return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_)
|
||||
#define IPP(_ip_, i, _iv_) _mm_sub_epi32(_mm_loadu_si128(_ip_++),sv)
|
||||
#define VI16(_ip_, _i_, _iv_, _sv_)
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_)
|
||||
#define IP16(_ip_, i, _iv_) _mm_sub_epi16(_mm_loadu_si128(_ip_++),sv)
|
||||
#define IP32(_ip_, i, _iv_) _mm_sub_epi32(_mm_loadu_si128(_ip_++),sv)
|
||||
#include "bitpack_.h"
|
||||
unsigned char *bitfpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi32(start);
|
||||
BITPACK128V32(in, b, out, sv);
|
||||
return pout;
|
||||
unsigned char *bitfpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi16(start); BITPACK128V16(in, b, out, sv); return pout;
|
||||
}
|
||||
unsigned char *bitfpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi32(start); BITPACK128V32(in, b, out, sv); return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi32(DELTA128x32(v,_sv_),cv); _sv_ = v
|
||||
#define IPP(ip, i, _iv_) _iv_
|
||||
#define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi16(DELTA128x16(v,_sv_),cv); _sv_ = v
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = _mm_sub_epi32(DELTA128x32(v,_sv_),cv); _sv_ = v
|
||||
#define IP16(ip, i, _iv_) _iv_
|
||||
#define IP32(ip, i, _iv_) _iv_
|
||||
unsigned char *bitd1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(1); BITPACK128V16(in, b, out, sv); return pout;
|
||||
}
|
||||
unsigned char *bitd1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1);
|
||||
BITPACK128V32(in, b, out, sv); return pout;
|
||||
__m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1); BITPACK128V32(in, b, out, sv); return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi32(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi32(_sv_,cv);
|
||||
#define IPP(ip, i, _iv_) _iv_
|
||||
unsigned char *bitf1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set_epi32(start+4,start+3,start+2,start+1), cv = _mm_set1_epi32(4);
|
||||
BITPACK128V32(in, b, out, sv); return pout;
|
||||
#define VI16(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi16(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi16(_sv_,cv);
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm_sub_epi32(_mm_loadu_si128(_ip_++),_sv_); _sv_ = _mm_add_epi32(_sv_,cv);
|
||||
#define IP16(ip, i, _iv_) _iv_
|
||||
#define IP32(ip, i, _iv_) _iv_
|
||||
unsigned char *bitf1pack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set_epi16(start+8,start+7,start+6,start+5,start+4,start+3,start+2,start+1), cv = _mm_set1_epi16(8); BITPACK128V16(in, b, out, sv); return pout;
|
||||
}
|
||||
unsigned char *bitf1pack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set_epi32( start+4,start+3,start+2,start+1), cv = _mm_set1_epi32(4); BITPACK128V32(in, b, out, sv); return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v; _iv_ = ZIGZAG128x32(_iv_)
|
||||
unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1);
|
||||
BITPACK128V32(in, b, out, sv);
|
||||
return pout;
|
||||
#define VI16(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x16(v,_sv_); _sv_ = v; _iv_ = ZIGZAG128x16(_iv_)
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm_loadu_si128(_ip_++); _iv_ = DELTA128x32(v,_sv_); _sv_ = v; _iv_ = ZIGZAG128x32(_iv_)
|
||||
unsigned char *bitzpack128v16(unsigned short *__restrict in, unsigned n, unsigned char *__restrict out, unsigned short start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi16(start), cv = _mm_set1_epi16(1); BITPACK128V16(in, b, out, sv); return pout;
|
||||
}
|
||||
unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(128*b);
|
||||
__m128i v, sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1); BITPACK128V32(in, b, out, sv); return pout;
|
||||
}
|
||||
|
||||
#define _BITNPACK128V(in, n, out, csize, usize) {\
|
||||
unsigned char *op = out;\
|
||||
for(ip = in; ip != in + (n&~(csize-1)); ip += csize) { PREFETCH(ip+512);\
|
||||
unsigned b; TEMPLATE2(BITSIZE,usize)(ip, csize, b); *op++ = b; op = TEMPLATE2(bitpack128v, usize)(ip, csize, op, b); \
|
||||
} if(n&=(csize-1)) { unsigned b; TEMPLATE2(BITSIZE,usize)(ip, n, b); *op++ = b; op = TEMPLATE2(bitpack, usize)(ip, n, op, b); }\
|
||||
return op - out;\
|
||||
}
|
||||
|
||||
#define _BITNDPACK128V(in, n, out, csize, usize, _bitd_, _bitpackv_, _bitpack_) { if(!n) return 0;\
|
||||
unsigned char *op = out; \
|
||||
start = *in++; \
|
||||
TEMPLATE2(vbxput, usize)(op, start);\
|
||||
for(ip = in; ip != in + (n&~(csize-1)); ip += csize, start = ip[-1]) { PREFETCH(ip+512);\
|
||||
unsigned b = TEMPLATE2(_bitd_, usize)(ip, csize, start); *op++ = b; op = TEMPLATE2(_bitpackv_, usize)(ip, csize, op, start, b);\
|
||||
} if(n&=(csize-1)) { unsigned b = TEMPLATE2(_bitd_, usize)(ip, n, start); *op++ = b; op = TEMPLATE2(_bitpack_, usize)(ip, n, op, start, b); }\
|
||||
return op - out;\
|
||||
}
|
||||
|
||||
size_t bitnpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNPACK128V( in, n, out, 128, 16); }
|
||||
size_t bitnpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNPACK128V( in, n, out, 128, 32); }
|
||||
|
||||
size_t bitndpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACK128V(in, n, out, 128, 16, bitd, bitdpack128v, bitdpack); }
|
||||
size_t bitndpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACK128V(in, n, out, 128, 32, bitd, bitdpack128v, bitdpack); }
|
||||
|
||||
size_t bitnd1pack128v16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACK128V(in, n, out, 128, 16, bitd1, bitd1pack128v, bitd1pack); }
|
||||
size_t bitnd1pack128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACK128V(in, n, out, 128, 32, bitd1, bitd1pack128v, bitd1pack); }
|
||||
|
||||
size_t bitnzpack128v16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; _BITNDPACK128V(in, n, out, 128, 16, bitz, bitzpack128v, bitzpack); }
|
||||
size_t bitnzpack128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; _BITNDPACK128V(in, n, out, 128, 32, bitz, bitzpack128v, bitzpack); }
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__) && defined(AVX2_ON)
|
||||
@ -255,17 +324,17 @@ unsigned char *bitzpack128v32(unsigned *__restrict in, unsigned n, unsigne
|
||||
#define OPPE(__op)
|
||||
#define IPPE(__op)
|
||||
|
||||
#define VSTI(ip, i, iv, parm)
|
||||
#define IPP(ip, i, iv) _mm256_loadu_si256(ip++)
|
||||
#define VI32(ip, i, iv, parm)
|
||||
#define IP32(ip, i, iv) _mm256_loadu_si256(ip++)
|
||||
#include "bitpack_.h"
|
||||
|
||||
unsigned char *bitpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b) { unsigned char *pout = out+PAD8(256*b); BITPACK256V32(in, b, out, 0); return pout; }
|
||||
#undef VSTI
|
||||
#undef IPP
|
||||
#undef VI32
|
||||
#undef IP32
|
||||
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) _iv_ = _mm256_sub_epi32(_mm256_loadu_si256(_ip_++),sv)
|
||||
#define IPP(_ip_, i, _iv_) _iv_
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm256_sub_epi32(_mm256_loadu_si256(_ip_++),sv)
|
||||
#define IP32(_ip_, i, _iv_) _iv_
|
||||
#include "bitpack_.h"
|
||||
unsigned char *bitfpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b);
|
||||
__m256i v, sv = _mm256_set1_epi32(start);
|
||||
@ -273,15 +342,15 @@ unsigned char *bitfpack256v32(unsigned *__restrict in, unsigned n, unsigne
|
||||
return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) _iv_ = _mm256_sub_epi32(_mm256_loadu_si256(_ip_++),_sv_); _sv_ = _mm256_add_epi32(_sv_,cv);
|
||||
#define IPP(ip, i, _iv_) _iv_
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) _iv_ = _mm256_sub_epi32(_mm256_loadu_si256(_ip_++),_sv_); _sv_ = _mm256_add_epi32(_sv_,cv);
|
||||
#define IP32(ip, i, _iv_) _iv_
|
||||
unsigned char *bitf1pack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b);
|
||||
__m256i v, sv = _mm256_set_epi32(start+8,start+7,start+6,start+5,start+4,start+3,start+2,start+1), cv = _mm256_set1_epi32(8);
|
||||
BITPACK256V32(in, b, out, sv); return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = DELTA256x32(v,_sv_); _sv_ = v
|
||||
#define IPP(ip, i, _iv_) _iv_
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = DELTA256x32(v,_sv_); _sv_ = v
|
||||
#define IP32(ip, i, _iv_) _iv_
|
||||
#include "bitpack_.h"
|
||||
unsigned char *bitdpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b);
|
||||
__m256i v,sv = _mm256_set1_epi32(start);
|
||||
@ -289,14 +358,14 @@ unsigned char *bitdpack256v32(unsigned *__restrict in, unsigned n, unsigne
|
||||
return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = _mm256_sub_epi32(DELTA256x32(v,_sv_),cv); _sv_ = v
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = _mm256_sub_epi32(DELTA256x32(v,_sv_),cv); _sv_ = v
|
||||
unsigned char *bitd1pack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b);
|
||||
__m256i v, sv = _mm256_set1_epi32(start), cv = _mm256_set1_epi32(1);
|
||||
BITPACK256V32(in, b, out, sv);
|
||||
return pout;
|
||||
}
|
||||
|
||||
#define VSTI(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = DELTA256x32(v,_sv_); _sv_ = v; _iv_ = ZIGZAG256x32(_iv_)
|
||||
#define VI32(_ip_, _i_, _iv_, _sv_) v = _mm256_loadu_si256(_ip_++); _iv_ = DELTA256x32(v,_sv_); _sv_ = v; _iv_ = ZIGZAG256x32(_iv_)
|
||||
unsigned char *bitzpack256v32(unsigned *__restrict in, unsigned n, unsigned char *__restrict out, unsigned start, unsigned b) { unsigned char *pout = out+PAD8(256*b);
|
||||
__m256i v, sv = _mm256_set1_epi32(start), cv = _mm256_set1_epi32(1);
|
||||
BITPACK256V32(in, b, out, sv);
|
||||
|
Reference in New Issue
Block a user