.
This commit is contained in:
64
bitpack.c
64
bitpack.c
@ -25,6 +25,7 @@
|
||||
#include <stdio.h>
|
||||
#include "bitpack.h"
|
||||
#include "bitutil.h"
|
||||
#include "vint.h"
|
||||
#define PAD8(_x_) ( (((_x_)+8-1)/8) )
|
||||
|
||||
#pragma clang diagnostic push
|
||||
@ -97,45 +98,50 @@ typedef unsigned char *(*BITPACK_D64)(uint64_t *__restrict out, unsigned n, cons
|
||||
#include "bitpack_.h"
|
||||
#undef IPI
|
||||
|
||||
#define BITNPACK(in, n, out, csize, usize) { ip=in;\
|
||||
/*if(usize <= 32)\
|
||||
for(; ip < in+(n&~(csize*4-1)); ) { __builtin_prefetch(ip+512); unsigned char *p=ip; unsigned u,b;\
|
||||
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u = b; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
|
||||
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u |= b<<6; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
|
||||
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u |= b<<12; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
|
||||
TEMPLATE2(BITSIZE,usize)(ip, csize, b); u |= b<<18; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); ip+=csize;\
|
||||
ctou32(p) = p[3]<<24 | u&0xffffff;\
|
||||
}*/\
|
||||
for(in+=n; ip < in;) { unsigned iplen = in - ip; if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
|
||||
unsigned b; TEMPLATE2(BITSIZE,usize)(ip, csize, b); *out++ = b; out = TEMPLATE2(bitpacka, usize)[b](ip, csize, out); \
|
||||
#define BITNPACK(in, n, out, csize, usize) { unsigned char *op = out;\
|
||||
for(ip = in, in += n; ip < in;) { \
|
||||
unsigned iplen = in - ip,b;\
|
||||
if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
|
||||
TEMPLATE2(BITSIZE,usize)(ip, csize, b);\
|
||||
*op++ = b; \
|
||||
op = TEMPLATE2(bitpacka, usize)[b](ip, csize, op); \
|
||||
ip += csize;\
|
||||
} return out;\
|
||||
} \
|
||||
return op - out;\
|
||||
}
|
||||
|
||||
#define BITNDPACK(in, n, out, csize, usize, _start_, _bitd_, _bitpacka_) {\
|
||||
for(ip = in,in+=n; ip < in;) { unsigned iplen = in - ip; if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
|
||||
#define BITNDPACK(in, n, out, csize, usize, _bitd_, _bitpacka_) {\
|
||||
if(!n) return 0;\
|
||||
unsigned char *op = out; \
|
||||
start = *in++; \
|
||||
TEMPLATE2(vbxput, usize)(op, start);\
|
||||
\
|
||||
for(ip = in,--n, in += n; ip < in;) { \
|
||||
unsigned iplen = in - ip;\
|
||||
if(iplen > csize) iplen = csize; __builtin_prefetch(ip+512);\
|
||||
typeof(in[0]) _in[csize+8];\
|
||||
unsigned b = TEMPLATE2(_bitd_, usize)(ip, csize, _start_);\
|
||||
*out++ = b; out = TEMPLATE2(_bitpacka_, usize)[b](ip, csize, out, _start_);\
|
||||
unsigned b = TEMPLATE2(_bitd_, usize)(ip, csize, start);\
|
||||
*op++ = b; op = TEMPLATE2(_bitpacka_, usize)[b](ip, csize, op, start);\
|
||||
ip += csize;\
|
||||
start = ip[-1];\
|
||||
} return out;\
|
||||
} \
|
||||
return op - out;\
|
||||
}
|
||||
|
||||
unsigned char *bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip; BITNPACK(in, n, out, 128, 8); }
|
||||
unsigned char *bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip; BITNPACK(in, n, out, 128, 16); }
|
||||
unsigned char *bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip; BITNPACK(in, n, out, 128, 32); }
|
||||
unsigned char *bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip; BITNPACK(in, n, out, 128, 64); }
|
||||
size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNPACK(in, n, out, 128, 8); }
|
||||
size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNPACK(in, n, out, 128, 16); }
|
||||
size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNPACK(in, n, out, 128, 32); }
|
||||
size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNPACK(in, n, out, 128, 64); }
|
||||
|
||||
unsigned char *bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start) { uint8_t *ip; BITNDPACK(in, n, out, 128, 8, start, bitd, bitdpacka); }
|
||||
unsigned char *bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start) { uint16_t *ip; BITNDPACK(in, n, out, 128, 16, start, bitd, bitdpacka); }
|
||||
unsigned char *bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start) { uint32_t *ip; BITNDPACK(in, n, out, 128, 32, start, bitd, bitdpacka); }
|
||||
unsigned char *bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start) { uint64_t *ip; BITNDPACK(in, n, out, 128, 64, start, bitd, bitdpacka); }
|
||||
size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd, bitdpacka); }
|
||||
size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd, bitdpacka); }
|
||||
size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd, bitdpacka); }
|
||||
size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitd, bitdpacka); }
|
||||
|
||||
unsigned char *bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start) { uint8_t *ip; BITNDPACK(in, n, out, 128, 8, start, bitd1, bitd1packa); }
|
||||
unsigned char *bitnd1pack16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start) { uint16_t *ip; BITNDPACK(in, n, out, 128, 16, start, bitd1, bitd1packa); }
|
||||
unsigned char *bitnd1pack32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start) { uint32_t *ip; BITNDPACK(in, n, out, 128, 32, start, bitd1, bitd1packa); }
|
||||
unsigned char *bitnd1pack64(uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start) { uint64_t *ip; BITNDPACK(in, n, out, 128, 64, start, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out) { uint8_t *ip,start; BITNDPACK(in, n, out, 128, 8, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack16(uint16_t *__restrict in, size_t n, unsigned char *__restrict out) { uint16_t *ip,start; BITNDPACK(in, n, out, 128, 16, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out) { uint32_t *ip,start; BITNDPACK(in, n, out, 128, 32, bitd1, bitd1packa); }
|
||||
size_t bitnd1pack64(uint64_t *__restrict in, size_t n, unsigned char *__restrict out) { uint64_t *ip,start; BITNDPACK(in, n, out, 128, 64, bitd1, bitd1packa); }
|
||||
|
||||
//----------------------------------------------------------------------------------------------------------------------------------
|
||||
#ifdef __SSE2__
|
||||
|
65
bitpack.h
65
bitpack.h
@ -30,7 +30,38 @@ extern "C" {
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
|
||||
//********************************** Bit Packing : Pack ****************************************************************
|
||||
//******************** Bit Packing High Level API - n unlimited ***************************************************
|
||||
size_t bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
size_t bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
size_t bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitnd1pack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitnd1pack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t bitnd1pack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
size_t bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
size_t bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
size_t bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
size_t bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
size_t bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
size_t bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
size_t bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
size_t bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
size_t bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
//******** Bit Packing Low level API ****************************************************************
|
||||
|
||||
// bipackNN: Pack array with n unsigned (NN bits in[n]) values to the buffer out using nbits per value. Return value = end of compressed buffer out
|
||||
unsigned char *bitpack8( uint8_t *__restrict in, unsigned n, const unsigned char *__restrict out , unsigned b);
|
||||
@ -177,38 +208,6 @@ unsigned char *_bitd1unpack128h32(const unsigned char *__restrict in, unsigned n
|
||||
unsigned char *_bitunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b, unsigned *__restrict pex, unsigned char *bb);
|
||||
unsigned char *_bitdunpack256v32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb);
|
||||
unsigned char *_bitd1unpack256v32(const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb);
|
||||
|
||||
//------------------------------- Multiple blocks --------------------------------
|
||||
unsigned char *bitnpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *bitnpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *bitnpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *bitnpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
unsigned char *bitndpack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
|
||||
unsigned char *bitndpack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
|
||||
unsigned char *bitndpack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
|
||||
unsigned char *bitndpack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
|
||||
|
||||
unsigned char *bitnd1pack8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
|
||||
unsigned char *bitnd1pack16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
|
||||
unsigned char *bitnd1pack32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
|
||||
unsigned char *bitnd1pack64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
|
||||
|
||||
unsigned char *bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
unsigned char *bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
unsigned char *bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
unsigned char *bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
unsigned char *bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
|
||||
unsigned char *bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
|
||||
unsigned char *bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
|
||||
|
||||
unsigned char *bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
|
||||
unsigned char *bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
|
||||
unsigned char *bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
43
bitunpack.c
43
bitunpack.c
@ -26,6 +26,7 @@
|
||||
#include "conf.h"
|
||||
#include "bitutil.h"
|
||||
#include "bitpack.h"
|
||||
#include "vint.h"
|
||||
#define PAD8(_x_) (((_x_)+7)/8)
|
||||
|
||||
#pragma GCC push_options
|
||||
@ -82,33 +83,39 @@ typedef unsigned char *(*BITUNPACK_D64)(const unsigned char *__restrict in, unsi
|
||||
#undef OPI
|
||||
|
||||
#define BITNUNPACK(in, n, out, csize, usize) {\
|
||||
unsigned char *ip = in;\
|
||||
for(op = out,out+=n; op < out;) { unsigned oplen = out - op; if(oplen > csize) oplen = csize; __builtin_prefetch(in+512);\
|
||||
unsigned b = *in++; in = TEMPLATE2(bitunpacka, usize)[b](in, csize, op);\
|
||||
unsigned b = *ip++; ip = TEMPLATE2(bitunpacka, usize)[b](ip, csize, op);\
|
||||
op += csize;\
|
||||
} return in;\
|
||||
} \
|
||||
return ip - in;\
|
||||
}
|
||||
|
||||
#define BITNDUNPACK(in, n, out, csize, usize, _start_, _bitunpacka_) {\
|
||||
for(op = out,out+=n; op < out;) { unsigned oplen = out - op; if(oplen > csize) oplen = csize; __builtin_prefetch(in+512);\
|
||||
unsigned b = *in++; in = TEMPLATE2(_bitunpacka_, usize)[b](in, csize, op, _start_);\
|
||||
#define BITNDUNPACK(in, n, out, csize, usize, _bitunpacka_) {\
|
||||
if(!n) return 0;\
|
||||
unsigned char *ip = in;\
|
||||
TEMPLATE2(vbxget, usize)(ip, start); \
|
||||
*out++ = start;\
|
||||
for(--n,op = out,out+=n; op < out;) { unsigned oplen = out - op; if(oplen > csize) oplen = csize; __builtin_prefetch(ip+512);\
|
||||
unsigned b = *ip++; ip = TEMPLATE2(_bitunpacka_, usize)[b](ip, csize, op, start);\
|
||||
op += csize;\
|
||||
start = op[-1];\
|
||||
} return in;\
|
||||
} return ip - in;\
|
||||
}
|
||||
unsigned char *bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op; BITNUNPACK(in, n, out, 128, 8); }
|
||||
unsigned char *bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op; BITNUNPACK(in, n, out, 128, 16); }
|
||||
unsigned char *bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op; BITNUNPACK(in, n, out, 128, 32); }
|
||||
unsigned char *bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op; BITNUNPACK(in, n, out, 128, 64); }
|
||||
size_t bitnunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op; BITNUNPACK(in, n, out, 128, 8); }
|
||||
size_t bitnunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op; BITNUNPACK(in, n, out, 128, 16); }
|
||||
size_t bitnunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op; BITNUNPACK(in, n, out, 128, 32); }
|
||||
size_t bitnunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op; BITNUNPACK(in, n, out, 128, 64); }
|
||||
|
||||
unsigned char *bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start) { uint8_t *op; BITNDUNPACK(in, n, out, 128, 8, start, bitdunpacka); }
|
||||
unsigned char *bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start) { uint16_t *op; BITNDUNPACK(in, n, out, 128, 16, start, bitdunpacka); }
|
||||
unsigned char *bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start) { uint32_t *op; BITNDUNPACK(in, n, out, 128, 32, start, bitdunpacka); }
|
||||
unsigned char *bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start) { uint64_t *op; BITNDUNPACK(in, n, out, 128, 64, start, bitdunpacka); }
|
||||
size_t bitndunpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op,start; BITNDUNPACK(in, n, out, 128, 8, bitdunpacka); }
|
||||
size_t bitndunpack16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op,start; BITNDUNPACK(in, n, out, 128, 16, bitdunpacka); }
|
||||
size_t bitndunpack32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op,start; BITNDUNPACK(in, n, out, 128, 32, bitdunpacka); }
|
||||
size_t bitndunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op,start; BITNDUNPACK(in, n, out, 128, 64, bitdunpacka); }
|
||||
|
||||
unsigned char *bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start) { uint8_t *op; BITNDUNPACK(in, n, out, 128, 8, start, bitd1unpacka); }
|
||||
unsigned char *bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start) { uint16_t *op; BITNDUNPACK(in, n, out, 128, 16, start, bitd1unpacka); }
|
||||
unsigned char *bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start) { uint32_t *op; BITNDUNPACK(in, n, out, 128, 32, start, bitd1unpacka); }
|
||||
unsigned char *bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start) { uint64_t *op; BITNDUNPACK(in, n, out, 128, 64, start, bitd1unpacka); }
|
||||
size_t bitnd1unpack8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out) { uint8_t *op,start; BITNDUNPACK(in, n, out, 128, 8, bitd1unpacka); }
|
||||
size_t bitnd1unpack16(unsigned char *__restrict in, size_t n, uint16_t *__restrict out) { uint16_t *op,start; BITNDUNPACK(in, n, out, 128, 16, bitd1unpacka); }
|
||||
size_t bitnd1unpack32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out) { uint32_t *op,start; BITNDUNPACK(in, n, out, 128, 32, bitd1unpacka); }
|
||||
size_t bitnd1unpack64(unsigned char *__restrict in, size_t n, uint64_t *__restrict out) { uint64_t *op,start; BITNDUNPACK(in, n, out, 128, 64, bitd1unpacka); }
|
||||
|
||||
//--------------------------------------------------------------------------------------------------------------------------------------
|
||||
#ifdef __SSE2__
|
||||
|
@ -67,6 +67,7 @@
|
||||
typedef unsigned long long tm_t;
|
||||
#define TM_T 1000000.0
|
||||
#define TM_MAX (1ull<<63)
|
||||
#if 1
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
static LARGE_INTEGER tps;
|
||||
@ -76,6 +77,9 @@ static tm_t tminit() { QueryPerformanceFrequency(&tps); tm_t t0=tmtime(),ts; whi
|
||||
static tm_t tmtime(void) { struct timespec tm; clock_gettime(CLOCK_MONOTONIC, &tm); return (tm_t)tm.tv_sec*1000000ull + tm.tv_nsec/1000; }
|
||||
static tm_t tminit() { tm_t t0=tmtime(),ts; while((ts = tmtime())==t0); return ts; }
|
||||
#endif
|
||||
#else
|
||||
#include "time_r.h"
|
||||
#endif
|
||||
//---------------------------------------- bench ----------------------------------------------------------------------
|
||||
#define TM_MAX (1ull<<63)
|
||||
|
||||
@ -942,7 +946,7 @@ int becomp(unsigned char *_in, unsigned _inlen, unsigned char *_out, unsigned ou
|
||||
op = codcomp(ip, iplen, op, oe-op, id, lev, prm, ifmt);
|
||||
ip += iplen;
|
||||
if(op > _out+outsize)
|
||||
die("Overflow error %llu, %u in lib=%d\n", outsize, (int)(ptrdiff_t)(op - _out), id);
|
||||
die("Compress overflow error %llu, %u in lib=%d\n", outsize, (int)(ptrdiff_t)(op - _out), id);
|
||||
}
|
||||
}
|
||||
TMEND(_inlen); // printf("cnt=%d, csize=%d\n", cnt, csize);
|
||||
@ -960,7 +964,7 @@ int bedecomp(unsigned char *_in, int _inlen, unsigned char *_out, unsigned _outl
|
||||
if(mode) {
|
||||
vbget32(ip, outlen); //outlen = ctou32(ip); ip += 4;
|
||||
ctou32(out) = outlen; out += 4;
|
||||
outlen *= 4; if(out+outlen >_out+_outlen) die("FATAL: overflow error %d ", outlen);
|
||||
outlen *= 4; if(out+outlen >_out+_outlen) die("FATAL: decompress overflow output error %d ", outlen);
|
||||
}
|
||||
for(op = out, out += outlen; op < out; ) {
|
||||
unsigned oplen = out - op;
|
||||
|
28
plugins.cc
28
plugins.cc
@ -595,13 +595,13 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int
|
||||
case TB_PFOR128: x = *in++; --n; VBPUT32(out, x);
|
||||
if(inc) return n == 128?p4d1enc128v32(in, n, out, x):p4d1enc32( in, n, out, x);
|
||||
else return n == 128?p4denc128v32( in, n, out, x):p4denc32( in, n, out, x);
|
||||
case TB_PFORN128: x = *in++; --n; VBPUT32(out, x); return inc?p4nd1enc128v32( in, n, out, x):p4ndenc128v32(in, n, out, x);
|
||||
case TB_PFORN128: return out+(inc?p4nd1enc128v32( in, n, out):p4ndenc128v32(in, n, out));
|
||||
case TB_PACK128V: x = *in++; --n;
|
||||
if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return n == 128?bitd1pack128v32(in, n, out, x, b):bitd1pack32(in, n, out, x, b); }
|
||||
else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return n == 128?bitdpack128v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); }
|
||||
#ifdef __AVX2__
|
||||
case TB_PFOR256: x = *in++; bitdelta32( in, --n, pa, x, inc);VBPUT32(out, x); return n == 256?p4enc256v32(pa, n, out ):p4enc32(pa, n, out);
|
||||
case TB_PFORN256: x = *in++; --n; VBPUT32(out, x); return inc?p4nd1enc256v32( in, n, out, x):p4ndenc256v32(in, n, out, x);
|
||||
case TB_PFORN256: return out+(inc?p4nd1enc256v32( in, n, out):p4ndenc256v32(in, n, out));
|
||||
/*case TB_PACK256V: x = *in++; --n;
|
||||
if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return n == 256?bitd1pack256v32(in, n, out, x, b):bitd1pack32(in, n, out, x, b); }
|
||||
else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return n == 256?bitdpack256v32( in, n, out, x, b):bitdpack32( in, n, out, x, b); }*/
|
||||
@ -618,7 +618,7 @@ unsigned char *codcomps(unsigned char *_in, unsigned _n, unsigned char *out, int
|
||||
case TB_PACK: x = *in++; --n;
|
||||
if(inc) { b = bitd132(in, n, x); VBPUT32(out, x); *out++=b; return bitd1pack32(in, n, out, x, b); }
|
||||
else { b = bitd32( in, n, x); VBPUT32(out, x); *out++=b; return bitdpack32( in, n, out, x, b); }
|
||||
case TB_NPACK: x = *in++; --n; VBPUT32(out, x); return inc?bitnd1pack32(in, n, out, x):bitndpack32( in, n, out, x);
|
||||
case TB_NPACK: return out+(inc?bitnd1pack32(in, n, out):bitndpack32( in, n, out));
|
||||
#if C_SIMPLE8B
|
||||
case AM_SIMPLE8B: b = bitdelta32( in+1, --n, pa, in[0], inc); VBPUT32(out, in[0]); if(b>28) die("simple-8b overflow.bits size>28\n");
|
||||
return vs8benc( pa, n, out);
|
||||
@ -731,7 +731,7 @@ unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, i
|
||||
case TB_FOR: VBGET32(in, x);*out = x; b = *in++; return inc?bitf1unpack32( in, n-1, out+1, x, b):bitfunpack32( in, n-1, out+1, x, b);
|
||||
case TB_FORDA: VBGET32(in, x);*out = x; b = *in++; return inc?bitf1unpackx32( in, n-1, out+1, x, b):bitfunpackx32( in, n-1, out+1, x, b);
|
||||
case TB_PACK: VBGET32(in, x);*out = x; b = *in++; return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b);
|
||||
case TB_NPACK: VBGET32(in, x);*out = x; return inc?bitnd1unpack32( in, n-1, out+1, x):bitndunpack32( in, n-1, out+1, x);
|
||||
case TB_NPACK: return in+(inc?bitnd1unpack32( in, n, out):bitndunpack32( in, n, out));
|
||||
case TB_ELIASFANO:VBGET32(in, x);*out++ = x; --n;
|
||||
if(inc) { return efano1dec32( in, n, out, x+1); }
|
||||
else { return efanodec32( in, n, out, x); }
|
||||
@ -742,9 +742,7 @@ unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, i
|
||||
case TB_PFOR128: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256);
|
||||
if(inc) { return n==128?p4d1dec128v32( in, n, out, x ):p4d1dec32(in, n, out, x); }
|
||||
else { return n==128?p4ddec128v32( in, n, out, x ):p4ddec32( in, n, out, x); }
|
||||
case TB_PFORN128: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256);
|
||||
if(inc) { return p4nd1dec128v32( in, n, out, x ); }
|
||||
else { return p4nddec128v32( in, n, out, x ); }
|
||||
case TB_PFORN128: return in+(inc?p4nd1dec128v32(in, n, out):p4nddec128v32( in, n, out));
|
||||
case TB_PACK128V: VBGET32(in, x);*out = x; b = *in++;
|
||||
if(n <= 128) { return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in,n-1, out+1, x, b); }
|
||||
else { return inc?bitd1unpack128v32( in, n, out+1, x, b):bitdunpack128v32(in,n, out+1, x, b); }
|
||||
@ -752,9 +750,7 @@ unsigned char *coddecomps(unsigned char *in, unsigned _n, unsigned char *_out, i
|
||||
case TB_PFOR256: VBGET32(in, x); *out++ = x; --n; //__builtin_prefetch(in+256);
|
||||
if(inc) { return n==256?p4d1dec256v32( in, n, out, x ):p4d1dec32(in, n, out, x); }
|
||||
else { return n==256?p4ddec256v32( in, n, out, x ):p4ddec32( in, n, out, x); }
|
||||
case TB_PFORN256: VBGET32(in, x); *out++ = x; --n;
|
||||
if(inc) { return p4nd1dec256v32( in, n, out, x ); }
|
||||
else { return p4nddec256v32( in, n, out, x ); }
|
||||
case TB_PFORN256: return in+(inc?p4nd1dec256v32(in, n, out ):p4nddec256v32( in, n, out));
|
||||
/*case TB_PACK256V: VBGET32(in, x);*out = x; b = *in++;
|
||||
if(n <= 256) return inc?bitd1unpack32( in, n-1, out+1, x, b):bitdunpack32( in, n-1, out+1, x, b);
|
||||
else { in = bitunpack256v32( in, out+1, b);bitundn32(out, n, -inc, inc); } break;*/
|
||||
@ -880,17 +876,17 @@ unsigned char *codcomp(unsigned char *_in, unsigned _n, unsigned char *out, int
|
||||
case TB_FORDA:
|
||||
case TB_PACK128H:
|
||||
case TB_PACK: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return bitpack32(in, n, out, b);
|
||||
case TB_NPACK: return bitnpack32(in, n, out);
|
||||
case TB_NPACK: return out+bitnpack32(in, n, out);
|
||||
case TB_PFORDA: return p4encx32( in, n, out);
|
||||
|
||||
#if C_TURBOPFORV
|
||||
case TB_ELIASFANOV:return out;
|
||||
case TB_PFOR128: return n == 128?p4enc128v32(in, n, out):p4enc32(in, n, out);
|
||||
case TB_PFORN128: return p4nenc128v32(in, n, out);
|
||||
case TB_PFORN128: return out+p4nenc128v32(in, n, out);
|
||||
case TB_PACK128V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return n != 128?bitpack32(in, n, out, b):bitpack128v32(in, n, out, b);
|
||||
#ifdef __AVX2__
|
||||
case TB_PFOR256: return n == 256?p4enc256v32(in, n, out):p4enc32(in, n, out);
|
||||
case TB_PFORN256: return p4nenc256v32(in, n, out);
|
||||
case TB_PFORN256: return out+p4nenc256v32(in, n, out);
|
||||
case TB_PACK256V: if(b < 0) { BITSIZE32(in, n, b); *out++ = b; } return n != 256?bitpack32(in, n, out, b):bitpack256v32(in, n, out, b);
|
||||
#endif
|
||||
#endif
|
||||
@ -1060,13 +1056,13 @@ unsigned char *coddecomp(unsigned char *in, unsigned _n, unsigned char *_out, in
|
||||
case TB_FOR: if(b < 0) b = *in++; return bitfunpack32( in, n, out, 0, b);
|
||||
case TB_FORDA: if(b < 0) b = *in++; return _bitunpackx32( in, n, out, b);
|
||||
case TB_PACK: if(b < 0) b = *in++; return bitunpack32( in, n, out, b);
|
||||
case TB_NPACK: return bitnunpack32( in, n, out);
|
||||
case TB_NPACK: return in+bitnunpack32( in, n, out);
|
||||
#if C_TURBOPFORV
|
||||
case TB_PFOR128 : __builtin_prefetch(in+256);return n == 128?p4dec128v32(in, n, out):p4dec32(in, n, out);
|
||||
case TB_PFORN128 : return p4ndec128v32(in, n, out);
|
||||
case TB_PFORN128 : return in+p4ndec128v32(in, n, out);
|
||||
#ifdef __AVX2__
|
||||
case TB_PFOR256 : __builtin_prefetch(in+256);return n == 256?p4dec256v32(in, n, out):p4dec32(in, n, out);
|
||||
case TB_PFORN256 : return p4ndec256v32(in, n, out);
|
||||
case TB_PFORN256 : return in+p4ndec256v32(in, n, out);
|
||||
case TB_PACK256V: if(b < 0) b = *in++; return n != 256?bitunpack32(in, n, out, b):bitunpack256v32(in, n, out, b);
|
||||
#endif
|
||||
case TB_ELIASFANOV: return in;
|
||||
|
4
vint.h
4
vint.h
@ -83,10 +83,12 @@ extern unsigned char _vtab32_[];
|
||||
#define vbxput64(_op_, _x_) { unsigned long long _x = _x_; _vbxput64(_op_, _x, ;); }
|
||||
#define vbxput32(_op_, _x_) { register unsigned _x = _x_; _vbxput32(_op_, _x, ;); }
|
||||
#define vbxput16(_op_, _x_) vbxput32(_op_, _x_)
|
||||
#define vbxput8( _op_, _x_) (*_op_++ = _x_)
|
||||
|
||||
#define vbxget64(_ip_, _x_) _vbxget64(_ip_, _x_, ;)
|
||||
#define vbxget32(_ip_, _x_) _vbxget32(_ip_, _x_, ;)
|
||||
#define vbxget16(_ip_, _x_) vbxget32(_ip_,_x_)
|
||||
#define vbxget8(_ip_, _x_) (_x_ = *_ip_++)
|
||||
//---------------------------------------------------------------------------
|
||||
#define VB_SIZE 64
|
||||
#define VB_MAX 254
|
||||
@ -159,10 +161,12 @@ static inline unsigned vbvlen64(unsigned x) { return _vbvlen64(x); }
|
||||
#define vbput64(_op_, _x_) { unsigned long long _x = _x_; _vbput64(_op_, _x, ;); }
|
||||
#define vbput32(_op_, _x_) { register unsigned _x = _x_; _vbput32(_op_, _x, ;); }
|
||||
#define vbput16(_op_, _x_) vbput32(_op_, _x_)
|
||||
#define vbput8(_op_, _x_) (*_op_++ = _x_)
|
||||
|
||||
#define vbget64(_ip_, _x_) _vbget64(_ip_, _x_, ;)
|
||||
#define vbget32(_ip_, _x_) _vbget32(_ip_, _x_, ;)
|
||||
#define vbget16(_ip_, _x_) vbget32(_ip_,_x_)
|
||||
#define vbget8(_ip_, _x_) (_x_ = *_ip_++)
|
||||
|
||||
//----------------------------- Variable byte: array functions -----------------------------------------------------------------------
|
||||
// Encoding/DEcoding: Return value = end of compressed output/input buffer out/in
|
||||
|
97
vp4.h
97
vp4.h
@ -29,10 +29,59 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
//************************************************ High level API - n unlimited ****************************************************
|
||||
// Compress integer array with n values to the buffer out.
|
||||
// Return value = number of bytes written to compressed buffer out
|
||||
size_t p4nenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); // SIMD (Vertical bitpacking)
|
||||
size_t p4nenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
size_t p4ndenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4ndenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4ndenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4ndenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4ndenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4ndenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
size_t p4nd1enc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nd1enc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nd1enc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nd1enc128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nd1enc256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
size_t p4nd1enc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
// Decompress the compressed n values in input buffer in to the integer array out.
|
||||
// Return value = number of bytes read from the ompressed buffer in
|
||||
size_t p4ndec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
size_t p4ndec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
size_t p4ndec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4ndec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4ndec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4ndec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
// Delta minimum = 0
|
||||
size_t p4nddec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
size_t p4nddec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
size_t p4nddec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4nddec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4nddec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4nddec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
// Delta minimum = 1
|
||||
size_t p4nd1dec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
size_t p4nd1dec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
size_t p4nd1dec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4nd1dec128v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4nd1dec256v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
size_t p4nd1dec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
|
||||
//************** Low level API - n limited to 128/256 ***************************************
|
||||
#define P4D_MAX 256
|
||||
|
||||
//********************************************** TurboPFor: Encode *****************************************************************************
|
||||
// -------------- TurboPFor: Encode
|
||||
//#include <assert.h>
|
||||
// Low level API: Single block n limited
|
||||
//compress integer array with n values to the buffer out. Return value = end of compressed buffer out
|
||||
@ -82,29 +131,6 @@ ALWAYS_INLINE unsigned _p4bits16( uint16_t *__restrict in, unsigned n,
|
||||
ALWAYS_INLINE unsigned _p4bits32( uint32_t *__restrict in, unsigned n, unsigned *pbx);
|
||||
ALWAYS_INLINE unsigned _p4bits64( uint64_t *__restrict in, unsigned n, unsigned *pbx);
|
||||
|
||||
//----------------------- n unlimited ------------------
|
||||
// compress integer array with n values to the buffer out. Return value = end of compressed buffer out
|
||||
unsigned char *p4nenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *p4nenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *p4nenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *p4nenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out); // SIMD (Vertical bitpacking)
|
||||
unsigned char *p4nenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
unsigned char *p4nenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out);
|
||||
|
||||
unsigned char *p4ndenc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
|
||||
unsigned char *p4ndenc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
|
||||
unsigned char *p4ndenc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
|
||||
unsigned char *p4ndenc128v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start); // SIMD (Vertical bitpacking)
|
||||
unsigned char *p4ndenc256v32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
|
||||
unsigned char *p4ndenc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
|
||||
|
||||
unsigned char *p4nd1enc8( uint8_t *__restrict in, size_t n, unsigned char *__restrict out, uint8_t start);
|
||||
unsigned char *p4nd1enc16( uint16_t *__restrict in, size_t n, unsigned char *__restrict out, uint16_t start);
|
||||
unsigned char *p4nd1enc32( uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
|
||||
unsigned char *p4nd1enc128v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start); // SIMD (Vertical bitpacking)
|
||||
unsigned char *p4nd1enc256v32(uint32_t *__restrict in, size_t n, unsigned char *__restrict out, uint32_t start);
|
||||
unsigned char *p4nd1enc64( uint64_t *__restrict in, size_t n, unsigned char *__restrict out, uint64_t start);
|
||||
|
||||
#define P4EB(_b_) (_b_ << 1)
|
||||
#define P4EBX(_b_, _bx_) (_bx_ << 8 | _b_ << 1 | 1)
|
||||
#define P4SAVE(_out_, _b_, _bx_) do { if(!_bx_) *_out_++ = P4EB(_b_);else *(uint16_t *)_out_ = P4EBX(_b_, _bx_), _out_ += 2; } while(0)
|
||||
@ -162,29 +188,6 @@ unsigned char *p4d1dec128v32( unsigned char *__restrict in, unsigned n, uint32_t
|
||||
unsigned char *p4d1dec256v32( unsigned char *__restrict in, unsigned n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *p4d1dec64( unsigned char *__restrict in, unsigned n, uint64_t *__restrict out, uint64_t start);
|
||||
|
||||
//************************************************ n unlimitred ******************************************************************************************
|
||||
unsigned char *p4ndec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out);
|
||||
unsigned char *p4ndec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out);
|
||||
unsigned char *p4ndec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
unsigned char *p4ndec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out); // SIMD (Vertical BitPacking)
|
||||
unsigned char *p4ndec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out);
|
||||
unsigned char *p4ndec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out);
|
||||
|
||||
// Delta minimum = 0
|
||||
unsigned char *p4nddec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
|
||||
unsigned char *p4nddec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
|
||||
unsigned char *p4nddec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *p4nddec128v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start); // SIMD (Vertical BitPacking)
|
||||
unsigned char *p4nddec256v32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *p4nddec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
|
||||
// Delta minimum = 1
|
||||
unsigned char *p4nd1dec8( unsigned char *__restrict in, size_t n, uint8_t *__restrict out, uint8_t start);
|
||||
unsigned char *p4nd1dec16( unsigned char *__restrict in, size_t n, uint16_t *__restrict out, uint16_t start);
|
||||
unsigned char *p4nd1dec32( unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *p4nd1dec128v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start); // SIMD (Vertical BitPacking)
|
||||
unsigned char *p4nd1dec256v32(unsigned char *__restrict in, size_t n, uint32_t *__restrict out, uint32_t start);
|
||||
unsigned char *p4nd1dec64( unsigned char *__restrict in, size_t n, uint64_t *__restrict out, uint64_t start);
|
||||
|
||||
//---------------- Direct Access functions to compressed TurboPFor array p4encx16/p4encx32 -------------------------------------------------------
|
||||
#ifndef NTURBOPFOR_DAC
|
||||
#define P4D_PAD8(_x_) ( (((_x_)+8-1)/8) )
|
||||
|
30
vp4c.c
30
vp4c.c
@ -290,18 +290,20 @@ unsigned char *TEMPLATE2(P4ENC, USIZE)(uint_t *__restrict in, unsigned n, unsign
|
||||
return TEMPLATE2(_P4ENC, USIZE)(in, n, out, b, bx);
|
||||
}
|
||||
|
||||
unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
|
||||
size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
|
||||
if(!n) return 0;
|
||||
unsigned char *op = out;
|
||||
uint_t *ip;
|
||||
for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
|
||||
unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(ip, CSIZE, &bx);
|
||||
#if EXCEP > 0
|
||||
if(bx <= USIZE) { P4SAVE(out, b, bx); } else *out++= 0x80|b<<1;
|
||||
if(bx <= USIZE) { P4SAVE(op, b, bx); } else *op++= 0x80|b<<1;
|
||||
#else
|
||||
P4SAVE(out, b, bx);
|
||||
P4SAVE(op, b, bx);
|
||||
#endif
|
||||
out = TEMPLATE2(_P4ENC, USIZE)(ip, CSIZE, out, b, bx); // out = TEMPLATE2(P4ENC, USIZE)(ip, CSIZE, out);
|
||||
op = TEMPLATE2(_P4ENC, USIZE)(ip, CSIZE, op, b, bx); // op = TEMPLATE2(P4ENC, USIZE)(ip, CSIZE, op);
|
||||
}
|
||||
return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), out);
|
||||
return TEMPLATE2(p4enc, USIZE)(ip, n&(CSIZE-1), op) - out;
|
||||
}
|
||||
#else
|
||||
ALWAYS_INLINE unsigned char *TEMPLATE2(P4DENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { if(!n) return out;
|
||||
@ -310,21 +312,25 @@ ALWAYS_INLINE unsigned char *TEMPLATE2(P4DENC, USIZE)(uint_t *__restrict in, uns
|
||||
return TEMPLATE2(P4ENC, USIZE)(_in, n, out);
|
||||
}
|
||||
|
||||
unsigned char *TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out, uint_t start) {
|
||||
uint_t *ip;
|
||||
for(ip = in; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
|
||||
size_t TEMPLATE2(P4NENC, USIZE)(uint_t *__restrict in, size_t n, unsigned char *__restrict out) {
|
||||
if(!n) return out;
|
||||
unsigned char *op = out;
|
||||
uint_t *ip, start = *in++;
|
||||
|
||||
TEMPLATE2(vbxput, USIZE)(op, start);
|
||||
for(ip = in, --n; ip != in+(n&~(CSIZE-1)); ip += CSIZE) { __builtin_prefetch(ip+512);
|
||||
uint_t _in[P4D_MAX+8];
|
||||
TEMPLATE2(bitdelta, USIZE)(ip, CSIZE, _in, start, P4DELTA);
|
||||
unsigned bx, b = TEMPLATE2(_p4bits, USIZE)(_in, CSIZE, &bx);
|
||||
#if EXCEP > 0
|
||||
if(bx <= USIZE) { P4SAVE(out, b, bx); } else *out++= 0x80|b<<1;
|
||||
if(bx <= USIZE) { P4SAVE(op, b, bx); } else *op++= 0x80|b<<1;
|
||||
#else
|
||||
P4SAVE(out, b, bx);
|
||||
P4SAVE(op, b, bx);
|
||||
#endif
|
||||
out = TEMPLATE2(_P4ENC, USIZE)(_in, CSIZE, out, b, bx); // out = TEMPLATE2(P4ENC, USIZE)(_in, CSIZE, out);
|
||||
op = TEMPLATE2(_P4ENC, USIZE)(_in, CSIZE, op, b, bx); // op = TEMPLATE2(P4ENC, USIZE)(_in, CSIZE, op);
|
||||
start = ip[CSIZE-1];
|
||||
}
|
||||
return TEMPLATE2(P4NENCS, USIZE)(ip, n&(CSIZE-1), out, start);
|
||||
return TEMPLATE2(P4NENCS, USIZE)(ip, n&(CSIZE-1), op, start) - out;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
49
vp4d.c
49
vp4d.c
@ -61,6 +61,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
|
||||
#define P4DELTA(a)
|
||||
#define P4DELTA_(a)
|
||||
#undef DELTA
|
||||
|
||||
#define _P4DEC _p4dec
|
||||
#define P4DEC p4dec
|
||||
@ -87,6 +88,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
|
||||
#define P4DELTA(a) ,a
|
||||
#define P4DELTA_(a) a
|
||||
#define DELTA
|
||||
|
||||
#define _P4DEC _p4ddec //delta0
|
||||
#define P4DEC p4ddec
|
||||
@ -139,6 +141,8 @@ static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
#define VSIZE 128
|
||||
#define P4DELTA(a)
|
||||
#define P4DELTA_(a)
|
||||
#undef DELTA
|
||||
|
||||
#define _P4DEC _p4dec128v
|
||||
#define P4DEC p4dec128v
|
||||
#define P4NDEC p4ndec128v
|
||||
@ -150,6 +154,8 @@ static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
|
||||
#define P4DELTA(a) ,a
|
||||
#define P4DELTA_(a) a
|
||||
#define DELTA
|
||||
|
||||
#define _P4DEC _p4ddec128v
|
||||
#define P4DEC p4ddec128v
|
||||
#define P4NDEC p4nddec128v
|
||||
@ -169,11 +175,13 @@ static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
#include "vp4d.c"
|
||||
#undef BITUNDD
|
||||
#undef P4DELTA
|
||||
#undef DELTA
|
||||
#endif
|
||||
|
||||
#ifdef __AVX2__
|
||||
#define P4DELTA(a)
|
||||
#define P4DELTA_(a)
|
||||
#undef DELTA
|
||||
#define VSIZE 256
|
||||
#define _P4DEC _p4dec256v
|
||||
#define P4DEC p4dec256v
|
||||
@ -186,6 +194,7 @@ static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
|
||||
#define P4DELTA(a) ,a
|
||||
#define P4DELTA_(a) a
|
||||
#define DELTA
|
||||
#define _P4DEC _p4ddec256v
|
||||
#define P4DEC p4ddec256v
|
||||
#define P4NDEC p4nddec256v
|
||||
@ -303,40 +312,48 @@ unsigned char *TEMPLATE2(P4DEC, USIZE)(unsigned char *__restrict in, unsigned n,
|
||||
#define CSIZE 128
|
||||
#endif
|
||||
|
||||
unsigned char *TEMPLATE2(P4NDEC, USIZE)(unsigned char *__restrict in, size_t n, uint_t *__restrict out P4DELTA(uint_t start) ) {
|
||||
size_t TEMPLATE2(P4NDEC, USIZE)(unsigned char *__restrict in, size_t n, uint_t *__restrict out) {
|
||||
if(!n) return 0;
|
||||
unsigned char *ip = in;
|
||||
uint_t *op;
|
||||
for(op = out; op != out+(n&~(CSIZE-1)); op += CSIZE) { __builtin_prefetch(in+512);
|
||||
unsigned b = *in++,bx,i;
|
||||
#ifdef DELTA
|
||||
uint_t start;
|
||||
TEMPLATE2(vbxget, USIZE)(ip, start);
|
||||
*out++ = start;
|
||||
--n;
|
||||
#endif
|
||||
for(op = out; op != out+(n&~(CSIZE-1)); op += CSIZE) { __builtin_prefetch(ip+512);
|
||||
unsigned b = *ip++,bx,i;
|
||||
|
||||
if(likely(!(b & 0x80))) {
|
||||
if(b & 1)
|
||||
bx = *in++;
|
||||
in = TEMPLATE2(_P4DEC, USIZE)(in, CSIZE, op P4DELTA(start), b, bx );
|
||||
bx = *ip++;
|
||||
ip = TEMPLATE2(_P4DEC, USIZE)(ip, CSIZE, op P4DELTA(start), b, bx );
|
||||
}
|
||||
#if USIZE > 8
|
||||
else {
|
||||
uint_t ex[P4D_MAX+8];
|
||||
b = (b & 0x7f)>>1;
|
||||
bx = *in++;
|
||||
in = TEMPLATE2(BITUNPACK, USIZE)(in, CSIZE, op, b);
|
||||
in = TEMPLATE2(vbdec, USIZE)(in, bx, ex);
|
||||
bx = *ip++;
|
||||
ip = TEMPLATE2(BITUNPACK, USIZE)(ip, CSIZE, op, b);
|
||||
ip = TEMPLATE2(vbdec, USIZE)(ip, bx, ex);
|
||||
for(i = 0; i != (bx & ~3); i += 4) {
|
||||
op[in[i ]] |= ex[i ] << b;
|
||||
op[in[i+1]] |= ex[i+1] << b;
|
||||
op[in[i+2]] |= ex[i+2] << b;
|
||||
op[in[i+3]] |= ex[i+3] << b;
|
||||
op[ip[i ]] |= ex[i ] << b;
|
||||
op[ip[i+1]] |= ex[i+1] << b;
|
||||
op[ip[i+2]] |= ex[i+2] << b;
|
||||
op[ip[i+3]] |= ex[i+3] << b;
|
||||
}
|
||||
for(;i < bx; i++)
|
||||
op[in[i]] |= ex[i] << b;
|
||||
in += bx;
|
||||
op[ip[i]] |= ex[i] << b;
|
||||
ip += bx;
|
||||
#ifdef BITUNDD
|
||||
TEMPLATE2(BITUNDD, USIZE)(op, CSIZE, start);
|
||||
#endif
|
||||
} // in = TEMPLATE2(P4DEC, USIZE)(in, CSIZE, op P4DELTA(start));
|
||||
} // ip = TEMPLATE2(P4DEC, USIZE)(ip, CSIZE, op P4DELTA(start));
|
||||
#endif
|
||||
P4DELTA_(start = op[CSIZE-1]);
|
||||
}
|
||||
return TEMPLATE2(P4NDECS, USIZE)(in, n&(CSIZE-1), op P4DELTA(start));
|
||||
return TEMPLATE2(P4NDECS, USIZE)(ip, n&(CSIZE-1), op P4DELTA(start)) - in;
|
||||
}
|
||||
|
||||
#ifdef P4DECX
|
||||
|
Reference in New Issue
Block a user