This commit is contained in:
powturbo
2017-01-16 19:27:48 +01:00
committed by GitHub
parent d98fc553a5
commit 53dd5af49e
9 changed files with 223 additions and 181 deletions

View File

@ -25,6 +25,7 @@
#include <stdio.h>
#include "bitpack.h"
#include "bitutil.h"
#include "vint.h"
#define PAD8(_x_) ( (((_x_)+8-1)/8) )
#pragma clang diagnostic push
@ -97,45 +98,50 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
#include "bitpack_.h"
#undef IPI
#define BITNPACK(in, n, out, csize, usize) { ip=in;\
/*if(usize <= 32)\
for(; ip < in+(n&~(csize*4-1)); ) { __builtin_prefetch(ip+512); unsigned char *p=ip; unsigned u,b;\
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u = b; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u |= b<<6; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u |= b<<12; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u |= b<<18; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
ctou32(p) = p[3]<<24 | u&0xffffff;\
}*/\
for(in+=n; ip < in;) { unsigned iplen = in - ip; if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
unsigned b; TEMPLATE2(BITSIZE,usize)(ip, csize, b); *out++ = b; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); \
#define BITNPACK(in, n, out, csize, usize) { unsigned char *op = out;\
for(ip = in, in += n; ip < in;) { \
unsigned iplen = in - ip,b;\
if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
TEMPLATE2(BITSIZE,usize)(ip, csize, b);\
*op++ = b; \
op = TEMPLATE2(bitpacka, usize)[b](ip, csize, op); \
ip += csize;\
} return out;\
} \
return op - out;\
}
#define BITNDPACK(in, n, out, csize, usize, _start_, _bitd_, _bitpacka_) {\
for(ip = in,in+=n; ip < in;) { unsigned iplen = in - ip; if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
#define BITNDPACK(in, n, out, csize, usize, _bitd_, _bitpacka_) {\
if(!n) return 0;\
unsigned char *op = out; \
start = *in++; \
TEMPLATE2(vbxput, usize)(op, start);\
\
for(ip = in,--n, in += n; ip < in;) { \
unsigned iplen = in - ip;\
if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
typeof(in[0]) _in[csize+8];\
unsigned b = TEMPLATE2(_bitd_, usize)(ip, csize, _start_);\
*out++ = b; out = TEMPLATE2(_bitpacka_, usize)[b](ip, csize, out, _start_);\
unsigned b = TEMPLATE2(_bitd_, usize)(ip, csize, start);\
*op++ = b; op = TEMPLATE2(_bitpacka_, usize)[b](ip, csize, op, start);\
ip += csize;\
start = ip[-1];\
} return out;\
} \
return op - out;\
}
unsigned char *bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip; BITNPACK(in, n, out, 128, 8); }
unsigned char *bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip; BITNPACK(in, n, out, 128, 16); }
unsigned char *bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; BITNPACK(in, n, out, 128, 32); }
unsigned char *bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip; BITNPACK(in, n, out, 128, 64); }
size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNPACK(in, n, out, 128, 8); }
size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNPACK(in, n, out, 128, 16); }
size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNPACK(in, n, out, 128, 32); }
size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNPACK(in, n, out, 128, 64); }
unsigned char *bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start) { uint8_t *ip; BITNDPACK(in, n, out, 128, 8, start, bitd, bitdpacka); }
unsigned char *bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start) { uint16_t *ip; BITNDPACK(in, n, out, 128, 16, start, bitd, bitdpacka); }
unsigned char *bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start) { uint32_t *ip; BITNDPACK(in, n, out, 128, 32, start, bitd, bitdpacka); }
unsigned char *bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start) { uint64_t *ip; BITNDPACK(in, n, out, 128, 64, start, bitd, bitdpacka); }
size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd, bitdpacka); }
size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd, bitdpacka); }
size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd, bitdpacka); }
size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitd, bitdpacka); }
unsigned char *bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start) { uint8_t *ip; BITNDPACK(in, n, out, 128, 8, start, bitd1, bitd1packa); }
unsigned char *bitnd1pack16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start) { uint16_t *ip; BITNDPACK(in, n, out, 128, 16, start, bitd1, bitd1packa); }
unsigned char *bitnd1pack32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start) { uint32_t *ip; BITNDPACK(in, n, out, 128, 32, start, bitd1, bitd1packa); }
unsigned char *bitnd1pack64(uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start) { uint64_t *ip; BITNDPACK(in, n, out, 128, 64, start, bitd1, bitd1packa); }
size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd1, bitd1packa); }
size_t bitnd1pack16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd1, bitd1packa); }
size_t bitnd1pack32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd1, bitd1packa); }
size_t bitnd1pack64(uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitd1, bitd1packa); }
//----------------------------------------------------------------------------------------------------------------------------------
#ifdef __SSE2__

View File

@ -30,7 +30,38 @@ extern "C" {
#endif
#include <stdint.h>
//********************************** Bit Packing : Pack ****************************************************************
//******************** Bit Packing High Level API - n unlimited ***************************************************
size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnd1pack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnd1pack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnd1pack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
size_t bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
size_t bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
size_t bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
size_t bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
size_t bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
size_t bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
size_t bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
size_t bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
//******** Bit Packing Low level API ****************************************************************
// bipackNN: Pack array with n unsigned (NN bits in[n]) values to the buffer out using nbits per value. Return value = end of compressed buffer out
unsigned char *bitpack8( uint8_t *__restrict in, unsigned n, const unsigned char *__restrict out , unsigned b);
@ -177,38 +208,6 @@ unsigned char *_bitd1unpack128h32(const unsigned char *__restrict in, unsigned n
unsigned char *_bitunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b, unsigned *__restrict pex, unsigned char *bb);
unsigned char *_bitdunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb);
unsigned char *_bitd1unpack256v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb);
//------------------------------- Multiple blocks --------------------------------
unsigned char *bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
unsigned char *bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
unsigned char *bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
unsigned char *bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
unsigned char *bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
unsigned char *bitnd1pack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
unsigned char *bitnd1pack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
unsigned char *bitnd1pack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
unsigned char *bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
unsigned char *bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
unsigned char *bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
unsigned char *bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
unsigned char *bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
unsigned char *bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
unsigned char *bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
unsigned char *bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
unsigned char *bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
unsigned char *bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
unsigned char *bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
unsigned char *bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
#ifdef __cplusplus
}
#endif

View File

@ -26,6 +26,7 @@
#include "conf.h"
#include "bitutil.h"
#include "bitpack.h"
#include "vint.h"
#define PAD8(_x_) (((_x_)+7)/8)
#pragma GCC push_options
@ -82,33 +83,39 @@ typedef unsigned char *(*BITUNPACK_D64)(const unsigned char *__restrict in, unsi
#undef OPI
#define BITNUNPACK(in, n, out, csize, usize) {\
unsigned char *ip = in;\
for(op = out,out+=n; op < out;) { unsigned oplen = out - op; if(oplen > csize) oplen = csize; __builtin_prefetch(in+512);\
unsigned b = *in++; in = TEMPLATE2(bitunpacka, usize)[b](in, csize, op);\
unsigned b = *ip++; ip = TEMPLATE2(bitunpacka, usize)[b](ip, csize, op);\
op += csize;\
} return in;\
} \
return ip - in;\
}
#define BITNDUNPACK(in, n, out, csize, usize, _start_, _bitunpacka_) {\
for(op = out,out+=n; op < out;) { unsigned oplen = out - op; if(oplen > csize) oplen = csize; __builtin_prefetch(in+512);\
unsigned b = *in++; in = TEMPLATE2(_bitunpacka_, usize)[b](in, csize, op, _start_);\
#define BITNDUNPACK(in, n, out, csize, usize, _bitunpacka_) {\
if(!n) return 0;\
unsigned char *ip = in;\
TEMPLATE2(vbxget, usize)(ip, start); \
*out++ = start;\
for(--n,op = out,out+=n; op < out;) { unsigned oplen = out - op; if(oplen > csize) oplen = csize; __builtin_prefetch(ip+512);\
unsigned b = *ip++; ip = TEMPLATE2(_bitunpacka_, usize)[b](ip, csize, op, start);\
op += csize;\
start = op[-1];\
} return in;\
} return ip - in;\
}
unsigned char *bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op; BITNUNPACK(in, n, out, 128, 8); }
unsigned char *bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op; BITNUNPACK(in, n, out, 128, 16); }
unsigned char *bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op; BITNUNPACK(in, n, out, 128, 32); }
unsigned char *bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op; BITNUNPACK(in, n, out, 128, 64); }
size_t bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op; BITNUNPACK(in, n, out, 128, 8); }
size_t bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op; BITNUNPACK(in, n, out, 128, 16); }
size_t bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op; BITNUNPACK(in, n, out, 128, 32); }
size_t bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op; BITNUNPACK(in, n, out, 128, 64); }
unsigned char *bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start) { uint8_t *op; BITNDUNPACK(in, n, out, 128, 8, start, bitdunpacka); }
unsigned char *bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start) { uint16_t *op; BITNDUNPACK(in, n, out, 128, 16, start, bitdunpacka); }
unsigned char *bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start) { uint32_t *op; BITNDUNPACK(in, n, out, 128, 32, start, bitdunpacka); }
unsigned char *bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start) { uint64_t *op; BITNDUNPACK(in, n, out, 128, 64, start, bitdunpacka); }
size_t bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op,start; BITNDUNPACK(in, n, out, 128, 8, bitdunpacka); }
size_t bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op,start; BITNDUNPACK(in, n, out, 128, 16, bitdunpacka); }
size_t bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op,start; BITNDUNPACK(in, n, out, 128, 32, bitdunpacka); }
size_t bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op,start; BITNDUNPACK(in, n, out, 128, 64, bitdunpacka); }
unsigned char *bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start) { uint8_t *op; BITNDUNPACK(in, n, out, 128, 8, start, bitd1unpacka); }
unsigned char *bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start) { uint16_t *op; BITNDUNPACK(in, n, out, 128, 16, start, bitd1unpacka); }
unsigned char *bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start) { uint32_t *op; BITNDUNPACK(in, n, out, 128, 32, start, bitd1unpacka); }
unsigned char *bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start) { uint64_t *op; BITNDUNPACK(in, n, out, 128, 64, start, bitd1unpacka); }
size_t bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op,start; BITNDUNPACK(in, n, out, 128, 8, bitd1unpacka); }
size_t bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op,start; BITNDUNPACK(in, n, out, 128, 16, bitd1unpacka); }
size_t bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op,start; BITNDUNPACK(in, n, out, 128, 32, bitd1unpacka); }
size_t bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op,start; BITNDUNPACK(in, n, out, 128, 64, bitd1unpacka); }
//--------------------------------------------------------------------------------------------------------------------------------------
#ifdef __SSE2__

View File

@ -67,6 +67,7 @@
typedef unsigned long long tm_t;
#define TM_T 1000000.0
#define TM_MAX (1ull<<63)
#if 1
#ifdef _WIN32
#include <windows.h>
static LARGE_INTEGER tps;
@ -76,6 +77,9 @@ static tm_t tminit() { QueryPerformanceFrequency(&tps); tm_t t0=tmtime(),ts; whi
static tm_t tmtime(void) { struct timespec tm; clock_gettime(CLOCK_MONOTONIC, &tm); return (tm_t)tm.tv_sec*1000000ull + tm.tv_nsec/1000; }
static tm_t tminit() { tm_t t0=tmtime(),ts; while((ts = tmtime())==t0); return ts; }
#endif
#else
#include "time_r.h"
#endif
//---------------------------------------- bench ----------------------------------------------------------------------
#define TM_MAX (1ull<<63)
@ -942,7 +946,7 @@ int becomp(unsigned char *_in, unsigned _inlen, unsigned char *_out, unsigned ou
op = codcomp(ip, iplen, op, oe-op, id, lev, prm, ifmt);
ip += iplen;
if(op > _out+outsize)
die("Overflow error %llu, %u in lib=%d\n", outsize, (int)(ptrdiff_t)(op - _out), id);
die("Compress overflow error %llu, %u in lib=%d\n", outsize, (int)(ptrdiff_t)(op - _out), id);
}
}
TMEND(_inlen); // printf("cnt=%d, csize=%d\n", cnt, csize);
@ -958,9 +962,9 @@ int bedecomp(unsigned char *_in, int _inlen, unsigned char *_out, unsigned _outl
for(ip = _in, out = _out; out < _out+_outlen;) {
unsigned outlen=_outlen,bs;
if(mode) {
vbget32(ip, outlen); //outlen = ctou32(ip); ip += 4;
vbget32(ip, outlen); //outlen = ctou32(ip); ip += 4;
ctou32(out) = outlen; out += 4;
outlen *= 4; if(out+outlen >_out+_outlen) die("FATAL: overflow error %d ", outlen);
outlen *= 4; if(out+outlen >_out+_outlen) die("FATAL: decompress overflow output error %d ", outlen);
}
for(op = out, out += outlen; op < out; ) {
unsigned oplen = out - op;

View File

@ -595,13 +595,13 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int
case TB_PFOR128: x = *in++; --n; VBPUT32(out, x);
if(inc) return n == 128?p4d1enc128v32(in, n, out, x):p4d1enc32( in, n, out, x);
else return n == 128?p4denc128v32( in, n, out, x):p4denc32( in, n, out, x);
case TB_PFORN128: x = *in++; --n; VBPUT32(out, x); return inc?p4nd1enc128v32( in, n, out, x):p4ndenc128v32(in, n, out, x);
case TB_PFORN128: return out+(inc?p4nd1enc128v32( in, n, out):p4ndenc128v32(in, n, out));
case TB_PACK128V: x = *in++; --n;
if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return n == 128?bitd1pack128v32(in, n, out, x, b):bitd1pack32(in, n, out, x, b); }
else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return n == 128?bitdpack128v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); }
#ifdef __AVX2__
case TB_PFOR256: x = *in++; bitdelta32( in, --n, pa, x, inc);VBPUT32(out, x); return n == 256?p4enc256v32(pa, n, out ):p4enc32(pa, n, out);
case TB_PFORN256: x = *in++; --n; VBPUT32(out, x); return inc?p4nd1enc256v32( in, n, out, x):p4ndenc256v32(in, n, out, x);
case TB_PFORN256: return out+(inc?p4nd1enc256v32( in, n, out):p4ndenc256v32(in, n, out));
/*case TB_PACK256V: x = *in++; --n;
if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return n == 256?bitd1pack256v32(in, n, out, x, b):bitd1pack32(in, n, out, x, b); }
else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return n == 256?bitdpack256v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); }*/
@ -618,7 +618,7 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int
case TB_PACK: x = *in++; --n;
if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return bitd1pack32(in, n, out, x, b); }
else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return bitdpack32( in, n, out, x, b); }
case TB_NPACK: x = *in++; --n; VBPUT32(out, x); return inc?bitnd1pack32(in, n, out, x):bitndpack32( in, n, out, x);
case TB_NPACK: return out+(inc?bitnd1pack32(in, n, out):bitndpack32( in, n, out));
#if C_SIMPLE8B
case AM_SIMPLE8B: b = bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); if(b>28) die("simple-8b overflow.bits size>28\n");
return vs8benc( pa, n, out);
@ -731,30 +731,26 @@ unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, i
case TB_FOR: VBGET32(in, x);*out = x; b = *in++; return inc?bitf1unpack32( in, n-1, out+1, x, b):bitfunpack32( in, n-1, out+1, x, b);
case TB_FORDA: VBGET32(in, x);*out = x; b = *in++; return inc?bitf1unpackx32( in, n-1, out+1, x, b):bitfunpackx32( in, n-1, out+1, x, b);
case TB_PACK: VBGET32(in, x);*out = x; b = *in++; return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b);
case TB_NPACK: VBGET32(in, x);*out = x; return inc?bitnd1unpack32( in, n-1, out+1, x):bitndunpack32( in, n-1, out+1, x);
case TB_NPACK: return in+(inc?bitnd1unpack32( in, n, out):bitndunpack32( in, n, out));
case TB_ELIASFANO:VBGET32(in, x);*out++ = x; --n;
if(inc) { return efano1dec32( in, n, out, x+1); }
else { return efanodec32( in, n, out, x); }
#if C_TURBOPFORV
case TB_ELIASFANOV:VBGET32(in, x); *out++ = x; --n;
if(inc) { return n==128?efano1dec128v32(in, n, out, x+1 ):efano1dec32(in, n, out, x+1); }
else { return n==128?efanodec128v32( in, n, out, x ):efanodec32( in, n, out, x); }
if(inc) { return n==128?efano1dec128v32(in, n, out, x+1 ):efano1dec32(in, n, out, x+1); }
else { return n==128?efanodec128v32( in, n, out, x ):efanodec32( in, n, out, x); }
case TB_PFOR128: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256);
if(inc) { return n==128?p4d1dec128v32( in, n, out, x ):p4d1dec32(in, n, out, x); }
else { return n==128?p4ddec128v32( in, n, out, x ):p4ddec32( in, n, out, x); }
case TB_PFORN128: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256);
if(inc) { return p4nd1dec128v32( in, n, out, x ); }
else { return p4nddec128v32( in, n, out, x ); }
if(inc) { return n==128?p4d1dec128v32( in, n, out, x ):p4d1dec32(in, n, out, x); }
else { return n==128?p4ddec128v32( in, n, out, x ):p4ddec32( in, n, out, x); }
case TB_PFORN128: return in+(inc?p4nd1dec128v32(in, n, out):p4nddec128v32( in, n, out));
case TB_PACK128V: VBGET32(in, x);*out = x; b = *in++;
if(n <= 128) { return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in,n-1, out+1, x, b); }
else { return inc?bitd1unpack128v32(in,n, out+1, x, b):bitdunpack128v32(in,n, out+1, x, b); }
if(n <= 128) { return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in,n-1, out+1, x, b); }
else { return inc?bitd1unpack128v32( in, n, out+1, x, b):bitdunpack128v32(in,n, out+1, x, b); }
#ifdef __AVX2__
case TB_PFOR256: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256);
if(inc) { return n==256?p4d1dec256v32( in, n, out, x ):p4d1dec32(in, n, out, x); }
else { return n==256?p4ddec256v32( in, n, out, x ):p4ddec32( in, n, out, x); }
case TB_PFORN256: VBGET32(in, x); *out++ = x; --n;
if(inc) { return p4nd1dec256v32( in, n, out, x ); }
else { return p4nddec256v32( in, n, out, x ); }
case TB_PFORN256: return in+(inc?p4nd1dec256v32(in, n, out ):p4nddec256v32( in, n, out));
/*case TB_PACK256V: VBGET32(in, x);*out = x; b = *in++;
if(n <= 256) return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b);
else { in = bitunpack256v32( in, out+1, b);bitundn32(out, n, -inc, inc); } break;*/
@ -880,17 +876,17 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int
case TB_FORDA:
case TB_PACK128H:
case TB_PACK: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return bitpack32(in, n, out, b);
case TB_NPACK: return bitnpack32(in, n, out);
case TB_NPACK: return out+bitnpack32(in, n, out);
case TB_PFORDA: return p4encx32( in, n, out);
#if C_TURBOPFORV
case TB_ELIASFANOV:return out;
case TB_PFOR128: return n == 128?p4enc128v32(in, n, out):p4enc32(in, n, out);
case TB_PFORN128: return p4nenc128v32(in, n, out);
case TB_PFORN128: return out+p4nenc128v32(in, n, out);
case TB_PACK128V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return n != 128?bitpack32(in, n, out, b):bitpack128v32(in, n, out, b);
#ifdef __AVX2__
case TB_PFOR256: return n == 256?p4enc256v32(in, n, out):p4enc32(in, n, out);
case TB_PFORN256: return p4nenc256v32(in, n, out);
case TB_PFORN256: return out+p4nenc256v32(in, n, out);
case TB_PACK256V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return n != 256?bitpack32(in, n, out, b):bitpack256v32(in, n, out, b);
#endif
#endif
@ -1060,13 +1056,13 @@ unsigned char *coddecomp(unsigned char *in, unsigned _n, unsigned char *_out, in
case TB_FOR: if(b < 0) b = *in++; return bitfunpack32( in, n, out, 0, b);
case TB_FORDA: if(b < 0) b = *in++; return _bitunpackx32( in, n, out, b);
case TB_PACK: if(b < 0) b = *in++; return bitunpack32( in, n, out, b);
case TB_NPACK: return bitnunpack32( in, n, out);
case TB_NPACK: return in+bitnunpack32( in, n, out);
#if C_TURBOPFORV
case TB_PFOR128 : __builtin_prefetch(in+256);return n == 128?p4dec128v32(in, n, out):p4dec32(in, n, out);
case TB_PFORN128 : return p4ndec128v32(in, n, out);
case TB_PFORN128 : return in+p4ndec128v32(in, n, out);
#ifdef __AVX2__
case TB_PFOR256 : __builtin_prefetch(in+256);return n == 256?p4dec256v32(in, n, out):p4dec32(in, n, out);
case TB_PFORN256 : return p4ndec256v32(in, n, out);
case TB_PFORN256 : return in+p4ndec256v32(in, n, out);
case TB_PACK256V: if(b < 0) b = *in++; return n != 256?bitunpack32(in, n, out, b):bitunpack256v32(in, n, out, b);
#endif
case TB_ELIASFANOV: return in;

4
vint.h
View File

@ -83,10 +83,12 @@ extern unsigned char _vtab32_[];
#define vbxput64(_op_, _x_) { unsigned long long _x = _x_; _vbxput64(_op_, _x, ;); }
#define vbxput32(_op_, _x_) { register unsigned _x = _x_; _vbxput32(_op_, _x, ;); }
#define vbxput16(_op_, _x_) vbxput32(_op_, _x_)
#define vbxput8( _op_, _x_) (*_op_++ = _x_)
#define vbxget64(_ip_, _x_) _vbxget64(_ip_, _x_, ;)
#define vbxget32(_ip_, _x_) _vbxget32(_ip_, _x_, ;)
#define vbxget16(_ip_, _x_) vbxget32(_ip_,_x_)
#define vbxget8(_ip_, _x_) (_x_ = *_ip_++)
//---------------------------------------------------------------------------
#define VB_SIZE 64
#define VB_MAX 254
@ -159,10 +161,12 @@ static inline unsigned vbvlen64(unsigned x) { return _vbvlen64(x); }
#define vbput64(_op_, _x_) { unsigned long long _x = _x_; _vbput64(_op_, _x, ;); }
#define vbput32(_op_, _x_) { register unsigned _x = _x_; _vbput32(_op_, _x, ;); }
#define vbput16(_op_, _x_) vbput32(_op_, _x_)
#define vbput8(_op_, _x_) (*_op_++ = _x_)
#define vbget64(_ip_, _x_) _vbget64(_ip_, _x_, ;)
#define vbget32(_ip_, _x_) _vbget32(_ip_, _x_, ;)
#define vbget16(_ip_, _x_) vbget32(_ip_,_x_)
#define vbget8(_ip_, _x_) (_x_ = *_ip_++)
//----------------------------- Variable byte: array functions -----------------------------------------------------------------------
// Encoding/DEcoding: Return value = end of compressed output/input buffer out/in

97
vp4.h
View File

@ -29,10 +29,59 @@
extern "C" {
#endif
#include <stdint.h>
//************************************************ High level API - n unlimited ****************************************************
// Compress integer array with n values to the buffer out.
// Return value = number of bytes written to compressed buffer out
size_t p4nenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); // SIMD (Vertical bitpacking)
size_t p4nenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4ndenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4ndenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4ndenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4ndenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4ndenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4ndenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nd1enc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nd1enc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nd1enc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nd1enc128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nd1enc256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
size_t p4nd1enc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
// Decompress the compressed n values in input buffer in to the integer array out.
// Return value = number of bytes read from the ompressed buffer in
size_t p4ndec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
size_t p4ndec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
size_t p4ndec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4ndec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4ndec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4ndec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
// Delta minimum = 0
size_t p4nddec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
size_t p4nddec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
size_t p4nddec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4nddec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4nddec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4nddec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
// Delta minimum = 1
size_t p4nd1dec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
size_t p4nd1dec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
size_t p4nd1dec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4nd1dec128v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4nd1dec256v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
size_t p4nd1dec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
//************** Low level API - n limited to 128/256 ***************************************
#define P4D_MAX 256
//********************************************** TurboPFor: Encode *****************************************************************************
// -------------- TurboPFor: Encode
//#include <assert.h>
// Low level API: Single block n limited
//compress integer array with n values to the buffer out. Return value = end of compressed buffer out
@ -82,29 +131,6 @@ ALWAYS_INLINE unsigned _p4bits16( uint16_t *__restrict in, unsigned n,
ALWAYS_INLINE unsigned _p4bits32( uint32_t *__restrict in, unsigned n, unsigned *pbx);
ALWAYS_INLINE unsigned _p4bits64( uint64_t *__restrict in, unsigned n, unsigned *pbx);
//----------------------- n unlimited ------------------
// compress integer array with n values to the buffer out. Return value = end of compressed buffer out
unsigned char *p4nenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *p4nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *p4nenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *p4nenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); // SIMD (Vertical bitpacking)
unsigned char *p4nenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *p4nenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
unsigned char *p4ndenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
unsigned char *p4ndenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
unsigned char *p4ndenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
unsigned char *p4ndenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start); // SIMD (Vertical bitpacking)
unsigned char *p4ndenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
unsigned char *p4ndenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
unsigned char *p4nd1enc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
unsigned char *p4nd1enc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
unsigned char *p4nd1enc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
unsigned char *p4nd1enc128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start); // SIMD (Vertical bitpacking)
unsigned char *p4nd1enc256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
unsigned char *p4nd1enc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
#define P4EB(_b_) (_b_ << 1)
#define P4EBX(_b_, _bx_) (_bx_ << 8 | _b_ << 1 | 1)
#define P4SAVE(_out_, _b_, _bx_) do { if(!_bx_) *_out_++ = P4EB(_b_);else *(uint16_t *)_out_ = P4EBX(_b_, _bx_), _out_ += 2; } while(0)
@ -162,29 +188,6 @@ unsigned char *p4d1dec128v32( unsigned char *__restrict in, unsigned n, uint32_t
unsigned char *p4d1dec256v32( unsigned char *__restrict in, unsigned n, uint32_t *__restrict out, uint32_t start);
unsigned char *p4d1dec64( unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
//************************************************ n unlimitred ******************************************************************************************
unsigned char *p4ndec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
unsigned char *p4ndec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
unsigned char *p4ndec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
unsigned char *p4ndec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); // SIMD (Vertical BitPacking)
unsigned char *p4ndec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
unsigned char *p4ndec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
// Delta minimum = 0
unsigned char *p4nddec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
unsigned char *p4nddec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
unsigned char *p4nddec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
unsigned char *p4nddec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start); // SIMD (Vertical BitPacking)
unsigned char *p4nddec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
unsigned char *p4nddec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
// Delta minimum = 1
unsigned char *p4nd1dec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
unsigned char *p4nd1dec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
unsigned char *p4nd1dec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
unsigned char *p4nd1dec128v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start); // SIMD (Vertical BitPacking)
unsigned char *p4nd1dec256v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
unsigned char *p4nd1dec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
//---------------- Direct Access functions to compressed TurboPFor array p4encx16/p4encx32 -------------------------------------------------------
#ifndef NTURBOPFOR_DAC
#define P4D_PAD8(_x_) ( (((_x_)+8-1)/8) )

30
vp4c.c
View File

@ -290,18 +290,20 @@ unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsign
return TEMPLATE2(_P4ENC, USIZE)(in, n, out, b, bx);
}
unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
if(!n) return 0;
unsigned char *op = out;
uint_t *ip;
for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(ip, CSIZE, &bx);
#if EXCEP > 0
if(bx <= USIZE) { P4SAVE(out, b, bx); } else *out++= 0x80|b<<1;
if(bx <= USIZE) { P4SAVE(op, b, bx); } else *op++= 0x80|b<<1;
#else
P4SAVE(out, b, bx);
P4SAVE(op, b, bx);
#endif
out = TEMPLATE2(_P4ENC, USIZE)(ip, CSIZE, out, b, bx); // out = TEMPLATE2(P4ENC, USIZE)(ip, CSIZE, out);
op = TEMPLATE2(_P4ENC, USIZE)(ip, CSIZE, op, b, bx); // op = TEMPLATE2(P4ENC, USIZE)(ip, CSIZE, op);
}
return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), out);
return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), op) - out;
}
#else
ALWAYS_INLINE unsigned char *TEMPLATE2(P4DENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { if(!n) return out;
@ -310,21 +312,25 @@ ALWAYS_INLINE unsigned char *TEMPLATE2(P4DENC, USIZE)(uint_t *__restrict in, uns
return TEMPLATE2(P4ENC, USIZE)(_in, n, out);
}
unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out, uint_t start) {
uint_t *ip;
for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
if(!n) return out;
unsigned char *op = out;
uint_t *ip, start = *in++;
TEMPLATE2(vbxput, USIZE)(op, start);
for(ip = in, --n; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
uint_t _in[P4D_MAX+8];
TEMPLATE2(bitdelta, USIZE)(ip, CSIZE, _in, start, P4DELTA);
unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(_in, CSIZE, &bx);
#if EXCEP > 0
if(bx <= USIZE) { P4SAVE(out, b, bx); } else *out++= 0x80|b<<1;
if(bx <= USIZE) { P4SAVE(op, b, bx); } else *op++= 0x80|b<<1;
#else
P4SAVE(out, b, bx);
P4SAVE(op, b, bx);
#endif
out = TEMPLATE2(_P4ENC, USIZE)(_in, CSIZE, out, b, bx); // out = TEMPLATE2(P4ENC, USIZE)(_in, CSIZE, out);
op = TEMPLATE2(_P4ENC, USIZE)(_in, CSIZE, op, b, bx); // op = TEMPLATE2(P4ENC, USIZE)(_in, CSIZE, op);
start = ip[CSIZE-1];
}
return TEMPLATE2(P4NENCS, USIZE)(ip, n&(CSIZE-1), out, start);
return TEMPLATE2(P4NENCS, USIZE)(ip, n&(CSIZE-1), op, start) - out;
}
#endif

51
vp4d.c
View File

@ -61,6 +61,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#define P4DELTA(a)
#define P4DELTA_(a)
#undef DELTA
#define _P4DEC _p4dec
#define P4DEC p4dec
@ -87,6 +88,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#define P4DELTA(a) ,a
#define P4DELTA_(a) a
#define DELTA
#define _P4DEC _p4ddec //delta0
#define P4DEC p4ddec
@ -139,6 +141,8 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#define VSIZE 128
#define P4DELTA(a)
#define P4DELTA_(a)
#undef DELTA
#define _P4DEC _p4dec128v
#define P4DEC p4dec128v
#define P4NDEC p4ndec128v
@ -150,6 +154,8 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#define P4DELTA(a) ,a
#define P4DELTA_(a) a
#define DELTA
#define _P4DEC _p4ddec128v
#define P4DEC p4ddec128v
#define P4NDEC p4nddec128v
@ -169,11 +175,13 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#include "vp4d.c"
#undef BITUNDD
#undef P4DELTA
#undef DELTA
#endif
#ifdef __AVX2__
#define P4DELTA(a)
#define P4DELTA_(a)
#undef DELTA
#define VSIZE 256
#define _P4DEC _p4dec256v
#define P4DEC p4dec256v
@ -186,6 +194,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
#define P4DELTA(a) ,a
#define P4DELTA_(a) a
#define DELTA
#define _P4DEC _p4ddec256v
#define P4DEC p4ddec256v
#define P4NDEC p4nddec256v
@ -303,40 +312,48 @@ unsigned char *TEMPLATE2(P4DEC, USIZE)(unsigned char *__restrict in, unsigned n,
#define CSIZE 128
#endif
unsigned char *TEMPLATE2(P4NDEC, USIZE)(unsigned char *__restrict in, size_t n, uint_t *__restrict out P4DELTA(uint_t start) ) {
uint_t *op;
for(op = out; op != out+(n&~(CSIZE-1)); op += CSIZE) { __builtin_prefetch(in+512);
unsigned b = *in++,bx,i;
size_t TEMPLATE2(P4NDEC, USIZE)(unsigned char *__restrict in, size_t n, uint_t *__restrict out) {
if(!n) return 0;
unsigned char *ip = in;
uint_t *op;
#ifdef DELTA
uint_t start;
TEMPLATE2(vbxget, USIZE)(ip, start);
*out++ = start;
--n;
#endif
for(op = out; op != out+(n&~(CSIZE-1)); op += CSIZE) { __builtin_prefetch(ip+512);
unsigned b = *ip++,bx,i;
if(likely(!(b & 0x80))) {
if(b & 1)
bx = *in++;
in = TEMPLATE2(_P4DEC, USIZE)(in, CSIZE, op P4DELTA(start), b, bx );
bx = *ip++;
ip = TEMPLATE2(_P4DEC, USIZE)(ip, CSIZE, op P4DELTA(start), b, bx );
}
#if USIZE > 8
else {
uint_t ex[P4D_MAX+8];
b = (b & 0x7f)>>1;
bx = *in++;
in = TEMPLATE2(BITUNPACK, USIZE)(in, CSIZE, op, b);
in = TEMPLATE2(vbdec, USIZE)(in, bx, ex);
bx = *ip++;
ip = TEMPLATE2(BITUNPACK, USIZE)(ip, CSIZE, op, b);
ip = TEMPLATE2(vbdec, USIZE)(ip, bx, ex);
for(i = 0; i != (bx & ~3); i += 4) {
op[in[i ]] |= ex[i ] << b;
op[in[i+1]] |= ex[i+1] << b;
op[in[i+2]] |= ex[i+2] << b;
op[in[i+3]] |= ex[i+3] << b;
op[ip[i ]] |= ex[i ] << b;
op[ip[i+1]] |= ex[i+1] << b;
op[ip[i+2]] |= ex[i+2] << b;
op[ip[i+3]] |= ex[i+3] << b;
}
for(;i < bx; i++)
op[in[i]] |= ex[i] << b;
in += bx;
op[ip[i]] |= ex[i] << b;
ip += bx;
#ifdef BITUNDD
TEMPLATE2(BITUNDD, USIZE)(op, CSIZE, start);
#endif
} // in = TEMPLATE2(P4DEC, USIZE)(in, CSIZE, op P4DELTA(start));
} // ip = TEMPLATE2(P4DEC, USIZE)(ip, CSIZE, op P4DELTA(start));
#endif
P4DELTA_(start = op[CSIZE-1]);
}
return TEMPLATE2(P4NDECS, USIZE)(in, n&(CSIZE-1), op P4DELTA(start));
return TEMPLATE2(P4NDECS, USIZE)(ip, n&(CSIZE-1), op P4DELTA(start)) - in;
}
#ifdef P4DECX