TurboPFor: TurboPFor encode

This commit is contained in:
powturbo
2017-06-13 09:14:19 +02:00
parent 603ee0b745
commit 29c44df968

51
vp4c.c
View File

@ -23,8 +23,6 @@
**/ **/
// "TurboPFor: Integer Compression" Turbo PFor/PforDelta // "TurboPFor: Integer Compression" Turbo PFor/PforDelta
#ifndef USIZE #ifndef USIZE
#include <stdint.h>
#include "conf.h" #include "conf.h"
#include "bitpack.h" #include "bitpack.h"
#include "vint.h" #include "vint.h"
@ -144,7 +142,6 @@
#define P4BITS _p4bits #define P4BITS _p4bits
#ifndef NSIMD #ifndef NSIMD
#define HYBRID 1 // #define HYBRID 1 //
#ifdef __SSE2__ #ifdef __SSE2__
//-- SIMD: Vertical bitpacking //-- SIMD: Vertical bitpacking
@ -239,35 +236,36 @@ unsigned TEMPLATE2(_P4BITS, USIZE)(uint_t *__restrict in, unsigned n, unsigned *
#endif #endif
unsigned cnt[USIZE+8] = {0}, x, bx, bmp8=(n+7)/8; unsigned cnt[USIZE+8] = {0}, x, bx, bmp8=(n+7)/8;
uint_t *ip, u=0; uint_t *ip, u=0;
int b,i,ml,l,fx=0; int b,i,ml,l,fx=0,vv;
#define CNTE(i) { ++cnt[TEMPLATE2(bsr, USIZE)(ip[i])], u |= ip[i]; } #define CNTE(i) { ++cnt[TEMPLATE2(bsr, USIZE)(ip[i])], u |= ip[i]; }
for(ip = in; ip != in+(n&~3); ip+=4) { CNTE(0); CNTE(1); CNTE(2); CNTE(3); } for(ip = in; ip != in+(n&~3); ip+=4) { CNTE(0); CNTE(1); CNTE(2); CNTE(3); }
for(;ip != in+n;ip++) CNTE(0); for(;ip != in+n;ip++) CNTE(0);
b = TEMPLATE2(bsr, USIZE)(u); //xbits[b/*ctz64(b)*/]++; b = TEMPLATE2(bsr, USIZE)(u); //if(u) xbits[ctz64(u)]++;
bx = b; bx = b;
ml = PAD8(n*b)+1; x = cnt[b]; ml = PAD8(n*b)+1; x = cnt[b];
#if HYBRID > 0 #if HYBRID > 0
#define VBB(_x_,_b_) vb[_b_-7]+=_x_; vb[_b_-15]+=_x_*2; vb[_b_-19]+=_x_*3; vb[_b_-25]+=_x_*4; #define VBB(_x_,_b_) vb[_b_-7]+=_x_; vb[_b_-15]+=_x_*2; vb[_b_-19]+=_x_*3; vb[_b_-25]+=_x_*4;
int vv = x; VBB(x,b); vv = x; VBB(x,b);
#else #else
ml -= 2+bmp8; ml -= 2+bmp8;
#endif #endif
for(i = b-1; i >= 0; --i) { for(i = b-1; i >= 0; --i) {
int fi,v;
#if HYBRID > 0 #if HYBRID > 0
v = PAD8(n*i) + 2 + x + vv;
l = PAD8(n*i) + 2+bmp8 + PAD8(x*(bx-i)); l = PAD8(n*i) + 2+bmp8 + PAD8(x*(bx-i));
int v = PAD8(n*i) + 2 + x + vv, vx = 0;
x += cnt[i]; x += cnt[i];
vv += cnt[i]+vb[i]; vv += cnt[i]+vb[i];
VBB(cnt[i],i); VBB(cnt[i],i);
int fi = l < ml; ml = fi?l:ml; b = fi?i:b; fx=fi?0:fx; fi = l < ml; ml = fi?l:ml; b = fi?i:b; fx=fi?0:fx;
fi = v < ml; ml = fi?v:ml; b = fi?i:b; fx=fi?1:fx; fi = v < ml; ml = fi?v:ml; b = fi?i:b; fx=fi?1:fx;
#else #else
l = PAD8(n*i) + PAD8(x*(bx-i)); l = PAD8(n*i) + PAD8(x*(bx-i));
x += cnt[i]; x += cnt[i];
int fi = l < ml; fi = l < ml;
ml = fi?l:ml; b = fi?i:b; ml = fi?l:ml; b = fi?i:b;
#endif #endif
} //fx = 0; } //fx = 0;
@ -284,12 +282,13 @@ unsigned TEMPLATE2(_P4BITS, USIZE)(uint_t *__restrict in, unsigned n, unsigned *
#endif #endif
unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b, unsigned bx) { unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, unsigned b, unsigned bx) {
if(!bx)
return TEMPLATE2(BITPACK, USIZE)(in, n, out, b);
uint_t msk = (1ull << b)-1, _in[P4D_MAX+32], inx[P4D_MAX+32]; uint_t msk = (1ull << b)-1, _in[P4D_MAX+32], inx[P4D_MAX+32];
unsigned long long xmap[P4D_MAX/64] = {0}; unsigned long long xmap[P4D_MAX/64] = {0};
unsigned miss[P4D_MAX],i, xn, c; unsigned miss[P4D_MAX],i, xn, c;
unsigned char *_out = out;
if(!bx)
return TEMPLATE2(BITPACK, USIZE)(in, n, out, b);
#define MISS { miss[xn] = i; xn += in[i] > msk; _in[i] = in[i] & msk; i++; } #define MISS { miss[xn] = i; xn += in[i] > msk; _in[i] = in[i] & msk; i++; }
for(xn = i = 0; i != n&~3; ) { MISS; MISS; MISS; MISS; } for(xn = i = 0; i != n&~3; ) { MISS; MISS; MISS; MISS; }
while(i != n) MISS; while(i != n) MISS;
@ -299,7 +298,6 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig
xmap[c>>6] |= (1ull << (c&0x3f)); xmap[c>>6] |= (1ull << (c&0x3f));
inx[i] = in[c] >> b; inx[i] = in[c] >> b;
} }
unsigned char *_out = out;
#if HYBRID > 0 #if HYBRID > 0
if(bx <= USIZE) { if(bx <= USIZE) {
@ -319,8 +317,9 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig
return out; return out;
} }
unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) { if(!n) return out; unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) { unsigned bx, b;
unsigned bx, b = TEMPLATE2(P4BITS, USIZE)(in, n, &bx); //if(bx <= USIZE) printf("%d,%d ", b, bx);else printf("#%d ", b); if(!n) return out;
b = TEMPLATE2(P4BITS, USIZE)(in, n, &bx); //if(bx <= USIZE) printf("%d,%d ", b, bx);else printf("#%d ", b);
#if HYBRID > 0 #if HYBRID > 0
TEMPLATE2(P4HVE, USIZE)(out,b,bx); TEMPLATE2(P4HVE, USIZE)(out,b,bx);
#else #else
@ -331,13 +330,13 @@ unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsign
} }
size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) { size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
unsigned char *op = out;
uint_t *ip;
if(!n) if(!n)
return 0; return 0;
unsigned char *op = out; for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { unsigned bx, b; __builtin_prefetch(ip+512);
uint_t *ip; b = TEMPLATE2(P4BITS, USIZE)(ip, CSIZE, &bx);
for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
unsigned bx, b = TEMPLATE2(P4BITS, USIZE)(ip, CSIZE, &bx);
#if HYBRID > 0 #if HYBRID > 0
TEMPLATE2(P4HVE, USIZE)(op,b,bx); TEMPLATE2(P4HVE, USIZE)(op,b,bx);
#else #else
@ -348,24 +347,23 @@ size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *
return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), op) - out; return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), op) - out;
} }
#else #else
ALWAYS_INLINE unsigned char *TEMPLATE2(P4DENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { if(!n) return out; unsigned char *TEMPLATE2(P4DENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) {
uint_t _in[P4D_MAX+8]; uint_t _in[P4D_MAX+8];
if(!n) return out;
TEMPLATE2(BITDELTA, USIZE)(in, n, _in, start, P4DELTA); TEMPLATE2(BITDELTA, USIZE)(in, n, _in, start, P4DELTA);
return TEMPLATE2(P4ENC, USIZE)(_in, n, out); return TEMPLATE2(P4ENC, USIZE)(_in, n, out);
} }
size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) { size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
unsigned char *op = out;
uint_t *ip, start = *in++;
if(!n) if(!n)
return out; return out;
unsigned char *op = out;
uint_t *ip, start = *in++;
TEMPLATE2(vbxput, USIZE)(op, start); TEMPLATE2(vbxput, USIZE)(op, start);
for(ip = in, --n; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512); for(ip = in, --n; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { uint_t _in[P4D_MAX+8];unsigned bx, b; __builtin_prefetch(ip+512);
uint_t _in[P4D_MAX+8];
TEMPLATE2(BITDELTA, USIZE)(ip, CSIZE, _in, start, P4DELTA); TEMPLATE2(BITDELTA, USIZE)(ip, CSIZE, _in, start, P4DELTA);
unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(_in, CSIZE, &bx); b = TEMPLATE2(_p4bits, USIZE)(_in, CSIZE, &bx);
#if HYBRID > 0 #if HYBRID > 0
TEMPLATE2(P4HVE, USIZE)(op,b,bx); TEMPLATE2(P4HVE, USIZE)(op,b,bx);
#else #else
@ -380,4 +378,3 @@ size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *
#pragma clang diagnostic pop #pragma clang diagnostic pop
#endif #endif