TurboPFor: TurboPFor encode

This commit is contained in:
x
2017-01-06 10:35:14 +01:00
parent 48d058ef34
commit 8bf15f8bdc

59
vp4c.c
View File

@ -37,7 +37,6 @@
//-- Scalar
#define _P4ENC _p4enc
#define P4ENC p4enc
//#define P4NENC p4dnenc
#define BITPACK bitpack
#define USIZE 16
@ -73,7 +72,6 @@
#define VSIZE 128
#define _P4ENC _p4enc128v
#define P4ENC p4enc128v
//#define P4NENC p4dnencv
#define BITPACK bitpack128v
#define USIZE 32
#include "vp4c.c"
@ -106,8 +104,10 @@
#define VSC(a)
#ifdef _P4BITS
unsigned TEMPLATE2(_P4BITS, USIZE)(uint_t *__restrict in, unsigned n, unsigned *pbx) {
uint_t *ip; int b=0,r; int i,ml,l;
unsigned x, bx, cnt[USIZE+1] = {0}, _vb[USIZE*2+5] = {0}, *vb=&_vb[USIZE],fx=0, b64=(n+7)/8;
uint_t *ip, b=0;
int r,i,ml,l;
unsigned x, bx, cnt[USIZE+1] = {0}, _vb[USIZE*2+5] = {0}, *vb=&_vb[USIZE],fx=0, bmap8=(n+7)/8;
#if EXCEP >= 2
unsigned long long smap[USIZE+1][P4D_MAX/64]={0},xmap[P4D_MAX/64]={0}; unsigned c;
#endif
@ -126,28 +126,35 @@ unsigned TEMPLATE2(_P4BITS, USIZE)(uint_t *__restrict in, unsigned n, unsigned *
#elif EXCEP == 2
for(c = 0; c < (n+63)/64;c++) xmap[c] = smap[b][c];
#else
ml -= 2+b64;
ml -= 2+bmap8;
#endif
for(i = b-1; i >= 0; --i) {
#if EXCEP == 1
l = PAD8(n*i) + 2+b64 + PAD8(x*(bx-i));
#if EXCEP == 1 // Variable byte
l = PAD8(n*i) + 2+bmap8 + PAD8(x*(bx-i));
int v = PAD8(n*i) + 2 + x + vv, vx = 0;
x += cnt[i]; vv+=cnt[i]+vb[i]; VBB(cnt[i],i);
if(v < l) l=v,vx=1; if(unlikely(l < ml)) ml=l,b=i,fx=vx;
x += cnt[i]; vv+=cnt[i]+vb[i];
VBB(cnt[i],i);
if(v < l)
l=v,vx=1;
if(unlikely(l < ml))
ml=l,b=i,fx=vx;
#elif EXCEP == 2
uint_t tin[P4D_MAX],*tp=tin;
for(ip=in,c = 0; c < (n+63)/64;c++,ip+=64) {
unsigned long long z = xmap[c];
while(z) { unsigned x = ctz64(z); *tp++ = ip[x]; z ^= (1ull<<x); }
}
unsigned char out[2048],*op = TEMPLATE2(vsenc, USIZE)(tin, n, out); unsigned vv = op - out;
l = PAD8(n*i) + PAD8(x*(bx-i))+2+b64;
unsigned char out[2048],*op = TEMPLATE2(vsenc, USIZE)(tin, n, out);
unsigned vv = op - out;
l = PAD8(n*i) + PAD8(x*(bx-i))+2+bmap8;
int v = PAD8(n*i) + 2 + x + vv, vx = 0;
if(v < l) l=v,vx=1; if(unlikely(l < ml)) ml=l,b=i,fx=vx;
x += cnt[i];
for(c = 0; c < (n+63)/64;c++) xmap[c] |= smap[i][c];
#else
l = PAD8(n*i) + PAD8(x*(bx-i)); x += cnt[i]; unlikely(l < ml)?(ml=l,b=i):(ml=ml,b=b);
#else // fixed bitpack
l = PAD8(n*i) + PAD8(x*(bx-i));
x += cnt[i];
unlikely(l < ml)?(ml=l,b=i):(ml=ml,b=b);
#endif
} //fx = 0;
#if EXCEP > 0
@ -166,6 +173,7 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig
uint_t msk = (1ull << b)-1,_in[P4D_MAX], inx[P4D_MAX*2];
unsigned long long xmap[P4D_MAX/64] = { 0 };
unsigned miss[P4D_MAX],i, xn, c;
#define MISS { miss[xn] = i; xn += in[i] > msk; _in[i] = in[i] & msk; i++; }
for(xn = i = 0; i != n&~3; ) { MISS; MISS; MISS; MISS; }
while(i != n) MISS;
@ -188,7 +196,8 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig
*out++ = xn;
out = TEMPLATE2(BITPACK, USIZE)(_in, n, out, b);
out = TEMPLATE2(vbenc, USIZE)(inx, xn, out);
for(i = 0; i != xn; ++i) *out++ = miss[i];
for(i = 0; i != xn; ++i)
*out++ = miss[i];
}
#endif
return out;
@ -197,18 +206,28 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig
unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) {
unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(in, n, &bx);
#if EXCEP > 0
if(bx <= USIZE) { P4SAVE(out, b, bx); } else *out++= 0x80|b<<1;
if(bx <= USIZE) {
P4SAVE(out, b, bx);
} else *out++= 0x80|b<<1;
#else
P4SAVE(out, b, bx);
#endif
return TEMPLATE2(_P4ENC, USIZE)(in, n, out, b, bx);
}
/*unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) {
uint_t *ip;
for(ip = in; ip < in+n; ip += P4D_MAX) { unsigned l = (in+n) - ip; l = min(l, P4D_MAX);
out = TEMPLATE2(P4ENC, USIZE)(ip, l, out);;
}
/*#ifdef VSIZE
#define CSIZE VSIZE
#else
#define CSIZE 128
#endif
unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) {
uint_t *ip;
for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
out = TEMPLATE2(P4ENC, USIZE)(ip, CSIZE, out);
}
return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), out);
}*/
#pragma clang diagnostic pop
#endif