diff --git a/vp4c.c b/vp4c.c index 935d94d..0369392 100644 --- a/vp4c.c +++ b/vp4c.c @@ -37,7 +37,6 @@ //-- Scalar #define _P4ENC _p4enc #define P4ENC p4enc -//#define P4NENC p4dnenc #define BITPACK bitpack #define USIZE 16 @@ -73,7 +72,6 @@ #define VSIZE 128 #define _P4ENC _p4enc128v #define P4ENC p4enc128v -//#define P4NENC p4dnencv #define BITPACK bitpack128v #define USIZE 32 #include "vp4c.c" @@ -106,8 +104,10 @@ #define VSC(a) #ifdef _P4BITS unsigned TEMPLATE2(_P4BITS, USIZE)(uint_t *__restrict in, unsigned n, unsigned *pbx) { - uint_t *ip; int b=0,r; int i,ml,l; - unsigned x, bx, cnt[USIZE+1] = {0}, _vb[USIZE*2+5] = {0}, *vb=&_vb[USIZE],fx=0, b64=(n+7)/8; + uint_t *ip, b=0; + int r,i,ml,l; + unsigned x, bx, cnt[USIZE+1] = {0}, _vb[USIZE*2+5] = {0}, *vb=&_vb[USIZE],fx=0, bmap8=(n+7)/8; + #if EXCEP >= 2 unsigned long long smap[USIZE+1][P4D_MAX/64]={0},xmap[P4D_MAX/64]={0}; unsigned c; #endif @@ -126,28 +126,35 @@ unsigned TEMPLATE2(_P4BITS, USIZE)(uint_t *__restrict in, unsigned n, unsigned * #elif EXCEP == 2 for(c = 0; c < (n+63)/64;c++) xmap[c] = smap[b][c]; #else - ml -= 2+b64; + ml -= 2+bmap8; #endif for(i = b-1; i >= 0; --i) { - #if EXCEP == 1 - l = PAD8(n*i) + 2+b64 + PAD8(x*(bx-i)); + #if EXCEP == 1 // Variable byte + l = PAD8(n*i) + 2+bmap8 + PAD8(x*(bx-i)); int v = PAD8(n*i) + 2 + x + vv, vx = 0; - x += cnt[i]; vv+=cnt[i]+vb[i]; VBB(cnt[i],i); - if(v < l) l=v,vx=1; if(unlikely(l < ml)) ml=l,b=i,fx=vx; + x += cnt[i]; vv+=cnt[i]+vb[i]; + VBB(cnt[i],i); + if(v < l) + l=v,vx=1; + if(unlikely(l < ml)) + ml=l,b=i,fx=vx; #elif EXCEP == 2 uint_t tin[P4D_MAX],*tp=tin; for(ip=in,c = 0; c < (n+63)/64;c++,ip+=64) { unsigned long long z = xmap[c]; while(z) { unsigned x = ctz64(z); *tp++ = ip[x]; z ^= (1ull< 0 @@ -166,6 +173,7 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig uint_t msk = (1ull << b)-1,_in[P4D_MAX], inx[P4D_MAX*2]; unsigned long long xmap[P4D_MAX/64] = { 0 }; unsigned miss[P4D_MAX],i, xn, c; + #define MISS { miss[xn] = i; xn += in[i] > msk; _in[i] = in[i] & msk; i++; } for(xn = i = 0; i != n&~3; ) { MISS; MISS; MISS; MISS; } while(i != n) MISS; @@ -188,7 +196,8 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig *out++ = xn; out = TEMPLATE2(BITPACK, USIZE)(_in, n, out, b); out = TEMPLATE2(vbenc, USIZE)(inx, xn, out); - for(i = 0; i != xn; ++i) *out++ = miss[i]; + for(i = 0; i != xn; ++i) + *out++ = miss[i]; } #endif return out; @@ -197,18 +206,28 @@ unsigned char *TEMPLATE2(_P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsig unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) { unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(in, n, &bx); #if EXCEP > 0 - if(bx <= USIZE) { P4SAVE(out, b, bx); } else *out++= 0x80|b<<1; + if(bx <= USIZE) { + P4SAVE(out, b, bx); + } else *out++= 0x80|b<<1; #else P4SAVE(out, b, bx); #endif + return TEMPLATE2(_P4ENC, USIZE)(in, n, out, b, bx); } -/*unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) { - uint_t *ip; - for(ip = in; ip < in+n; ip += P4D_MAX) { unsigned l = (in+n) - ip; l = min(l, P4D_MAX); - out = TEMPLATE2(P4ENC, USIZE)(ip, l, out);; - } +/*#ifdef VSIZE + #define CSIZE VSIZE +#else + #define CSIZE 128 +#endif + +unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) { + uint_t *ip; + for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512); + out = TEMPLATE2(P4ENC, USIZE)(ip, CSIZE, out); + } + return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), out); }*/ #pragma clang diagnostic pop #endif