TurboPFor: Variable byte encode/decode

This commit is contained in:
x
2019-12-21 14:06:33 +01:00
parent 3c3972bc28
commit a2a41e201b

204
vint.c
View File

@ -1,7 +1,7 @@
/** /**
Copyright (C) powturbo 2013-2019 Copyright (C) powturbo 2013-2019
GPL v2 License GPL v2 License
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or the Free Software Foundation; either version 2 of the License, or
@ -21,13 +21,13 @@
- twitter : https://twitter.com/powturbo - twitter : https://twitter.com/powturbo
- email : powturbo [_AT_] gmail [_DOT_] com - email : powturbo [_AT_] gmail [_DOT_] com
**/ **/
// vint.c - "Integer Compression" variable byte // vint.c - "Integer Compression" variable byte
#ifndef USIZE #ifndef USIZE
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#pragma warning( disable : 4005) #pragma warning( disable : 4005)
#pragma warning( disable : 4090) #pragma warning( disable : 4090)
#pragma warning( disable : 4068) #pragma warning( disable : 4068)
#define BITUTIL_IN #define BITUTIL_IN
#define VINT_IN #define VINT_IN
@ -46,7 +46,7 @@
#define USIZE 32 #define USIZE 32
#include "vint.c" #include "vint.c"
#undef USIZE #undef USIZE
#define USIZE 64 #define USIZE 64
#include "vint.c" #include "vint.c"
#undef USIZE #undef USIZE
@ -68,7 +68,7 @@
#define USIZE 32 #define USIZE 32
#include "vint.c" #include "vint.c"
#undef USIZE #undef USIZE
#define USIZE 64 #define USIZE 64
#include "vint.c" #include "vint.c"
#undef USIZE #undef USIZE
@ -88,41 +88,41 @@
#define OVERFLOWD(in,n,out,vbmax) if(*in == (vbmax)) { memcpy(out, in+1, n*(USIZE/8)); return in+1+n*(USIZE/8); } #define OVERFLOWD(in,n,out,vbmax) if(*in == (vbmax)) { memcpy(out, in+1, n*(USIZE/8)); return in+1+n*(USIZE/8); }
#define OVERFLOWE(in,n,out,op,vbmax) if(op > out + n*(USIZE/8)) { *out = (vbmax); memcpy(out+1, in, n*(USIZE/8)); op = out+1+n*(USIZE/8); } #define OVERFLOWE(in,n,out,op,vbmax) if(op > out + n*(USIZE/8)) { *out = (vbmax); memcpy(out+1, in, n*(USIZE/8)); op = out+1+n*(USIZE/8); }
//#else //#else
//#define OVERFLOWD(in,n,out,vbmax) //#define OVERFLOWD(in,n,out,vbmax)
//#define OVERFLOWE(in,n,out,op,vbmax) //#define OVERFLOWE(in,n,out,op,vbmax)
//#endif //#endif
unsigned char *TEMPLATE2(vbdec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out) { unsigned char *TEMPLATE2(vbdec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out) {
register uint_t x, *op; register uint_t x, *op;
OVERFLOWD(in, n, out, VB_MAX+1); OVERFLOWD(in, n, out, VB_MAX+1);
#define VBE(_i_) TEMPLATE2(_vbget, USIZE)(in, x, op[_i_] = x) #define VBE(_i_) TEMPLATE2(_vbget, USIZE)(in, x, op[_i_] = x)
for(op = out; op != out+(n&~(UN-1)); op += UN) { for(op = out; op != out+(n&~(UN-1)); op += UN) {
VBE(0); VBE(1); VBE(2); VBE(3); VBE(0); VBE(1); VBE(2); VBE(3);
#if UN > 4 #if UN > 4
VBE(4); VBE(5); VBE(6); VBE(7); VBE(4); VBE(5); VBE(6); VBE(7);
#endif #endif
PREFETCH(in+16*USIZE, 0); PREFETCH(in+16*USIZE, 0);
} }
while(op != out+n) while(op != out+n)
TEMPLATE2(_vbget, USIZE)(in, x, *op++ = x ); TEMPLATE2(_vbget, USIZE)(in, x, *op++ = x );
return in; return in;
} }
#undef VBE #undef VBE
unsigned char *TEMPLATE2(vbenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) { unsigned char *TEMPLATE2(vbenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out) {
uint_t x, *ip; uint_t x, *ip;
unsigned char *op = out; unsigned char *op = out;
#define VBD(_i_) x = ip[_i_]; TEMPLATE2(_vbput, USIZE)(op, x, ;); #define VBD(_i_) x = ip[_i_]; TEMPLATE2(_vbput, USIZE)(op, x, ;);
for(ip = in; ip != in+(n&~(UN-1)); ip += UN) { PREFETCH(ip+USIZE*8, 0); for(ip = in; ip != in+(n&~(UN-1)); ip += UN) { PREFETCH(ip+USIZE*8, 0);
VBD(0); VBD(1); VBD(2); VBD(3); VBD(0); VBD(1); VBD(2); VBD(3);
#if UN > 4 #if UN > 4
VBD(4); VBD(5); VBD(6); VBD(7); VBD(4); VBD(5); VBD(6); VBD(7);
#endif #endif
} }
while(ip != in+n) { while(ip != in+n) {
x = *ip++; x = *ip++;
TEMPLATE2(_vbput, USIZE)(op, x, ;); TEMPLATE2(_vbput, USIZE)(op, x, ;);
} }
OVERFLOWE(in,n,out,op,VB_MAX+1); OVERFLOWE(in,n,out,op,VB_MAX+1);
@ -130,30 +130,30 @@ unsigned char *TEMPLATE2(vbenc, USIZE)(uint_t *__restrict in, unsigned n, unsign
} }
#undef VBD #undef VBD
uint_t TEMPLATE2(vbgetx, USIZE)(unsigned char *__restrict in, unsigned idx) { uint_t TEMPLATE2(vbgetx, USIZE)(unsigned char *__restrict in, unsigned idx) {
unsigned char *ip; unsigned char *ip;
unsigned i; unsigned i;
uint_t x; uint_t x;
if(*in == 255) if(*in == 255)
return TEMPLATE2(ctou, USIZE)(in+1+idx*(USIZE/8)); return TEMPLATE2(ctou, USIZE)(in+1+idx*(USIZE/8));
for(ip = in,i = 0; i <= idx; i++) for(ip = in,i = 0; i <= idx; i++)
ip += TEMPLATE2(_vbvlen, USIZE)(*ip); ip += TEMPLATE2(_vbvlen, USIZE)(*ip);
TEMPLATE2(_vbget, USIZE)(in, x, ;); TEMPLATE2(_vbget, USIZE)(in, x, ;);
return x; return x;
} }
/*unsigned TEMPLATE2(vbgeteq, USIZE)(unsigned char *__restrict in, unsigned n, uint_t key, unsigned char **__restrict _ip) { /*unsigned TEMPLATE2(vbgeteq, USIZE)(unsigned char *__restrict in, unsigned n, uint_t key, unsigned char **__restrict _ip) {
unsigned i; unsigned i;
unsigned char *ip; unsigned char *ip;
uint_t x; uint_t x;
if(*in == 255) { if(*in == 255) {
for(ip = (*_ip==in)?in:*ip; ip < in+n; ip+USIZE/8) { for(ip = (*_ip==in)?in:*ip; ip < in+n; ip+USIZE/8) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
if((x = TEMPLATE2(ctou, USIZE)(ip)) == key) break; if((x = TEMPLATE2(ctou, USIZE)(ip)) == key) break;
} }
} else for(ip = *_ip,i=idx; i < n; i++) { } else for(ip = *_ip,i=idx; i < n; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
if(x == key) break; if(x == key) break;
} }
*_ip = ip; *_ip = ip;
return i; return i;
@ -162,54 +162,54 @@ uint_t TEMPLATE2(vbgetx, USIZE)(unsigned char *__restrict in, unsigned idx) {
#define BIT_SET( p, n) (p[(n)/BITS] |= (0x80>>((n)%BITS))) #define BIT_SET( p, n) (p[(n)/BITS] |= (0x80>>((n)%BITS)))
#define BIT_CLEAR(p, n) (p[(n)/BITS] &= ~(0x80>>((n)%BITS))) #define BIT_CLEAR(p, n) (p[(n)/BITS] &= ~(0x80>>((n)%BITS)))
#define BIT_ISSET(p, n) (p[(n)/BITS] & (0x80>>((n)%BITS))) #define BIT_ISSET(p, n) (p[(n)/BITS] & (0x80>>((n)%BITS)))
unsigned char *TEMPLATE2(vbddenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { unsigned char *TEMPLATE2(vbddenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) {
uint_t *ip,v,pd=0,*p; uint_t *ip,v,pd=0,*p;
unsigned char *bp=out; out += (n+7)/8; unsigned char *bp=out; out += (n+7)/8;
#define VBDDE(i) { uint_t x; start = ip[i]-start; x = start-pd; pd = start; \ #define VBDDE(i) { uint_t x; start = ip[i]-start; x = start-pd; pd = start; \
if(!x) BIT_CLEAR(bp,i); else { BIT_SET(bp,i); x = TEMPLATE2(zigzagenc, USIZE)(x); TEMPLATE2(_vbput, USIZE)(out, x, ;); } start = ip[i]; } if(!x) BIT_CLEAR(bp,i); else { BIT_SET(bp,i); x = TEMPLATE2(zigzagenc, USIZE)(x); TEMPLATE2(_vbput, USIZE)(out, x, ;); } start = ip[i]; }
for(ip = in; ip != in+(n&~(8-1)); ip+=8,bp++) { VBDDE(0);VBDDE(1);VBDDE(2);VBDDE(3);VBDDE(4);VBDDE(5);VBDDE(6);VBDDE(7);} for(ip = in; ip != in+(n&~(8-1)); ip+=8,bp++) { VBDDE(0);VBDDE(1);VBDDE(2);VBDDE(3);VBDDE(4);VBDDE(5);VBDDE(6);VBDDE(7);}
for(p=ip; p != in+n; p++) VBDDE(p-ip); for(p=ip; p != in+n; p++) VBDDE(p-ip);
return out; return out;
} }
unsigned char *TEMPLATE2(vbdddec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) { unsigned char *TEMPLATE2(vbdddec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) {
uint_t *op,pd=0,*p; uint_t *op,pd=0,*p;
unsigned i; unsigned i;
#define VBDDD(i) { uint_t x=0; if(BIT_ISSET(bp,i)) { TEMPLATE2(_vbget, USIZE)(in, x, ;); pd += TEMPLATE2(zigzagdec, USIZE)(x); } op[i] = (start += pd); } #define VBDDD(i) { uint_t x=0; if(BIT_ISSET(bp,i)) { TEMPLATE2(_vbget, USIZE)(in, x, ;); pd += TEMPLATE2(zigzagdec, USIZE)(x); } op[i] = (start += pd); }
unsigned char *bp=in; in+=(n+7)/8; unsigned char *bp=in; in+=(n+7)/8;
for(op = out; op != out+(n&~(8-1)); op+=8,bp++) { for(op = out; op != out+(n&~(8-1)); op+=8,bp++) {
if(!bp[0]) { op[0]=(start+=pd); op[1]=(start+=pd); op[2]=(start+=pd); op[3]=(start+=pd); op[4]=(start+=pd); op[5]=(start+=pd); op[6]=(start+=pd); op[7]=(start+=pd); continue; } if(!bp[0]) { op[0]=(start+=pd); op[1]=(start+=pd); op[2]=(start+=pd); op[3]=(start+=pd); op[4]=(start+=pd); op[5]=(start+=pd); op[6]=(start+=pd); op[7]=(start+=pd); continue; }
VBDDD(0); VBDDD(1); VBDDD(2); VBDDD(3); VBDDD(4); VBDDD(5); VBDDD(6); VBDDD(7); PREFETCH(in+16*USIZE, 0); VBDDD(0); VBDDD(1); VBDDD(2); VBDDD(3); VBDDD(4); VBDDD(5); VBDDD(6); VBDDD(7); PREFETCH(in+16*USIZE, 0);
} }
for(p=op; p != out+n; p++) VBDDD(p-op); for(p=op; p != out+n; p++) VBDDD(p-op);
return in; return in;
} }
#undef VBDDE #undef VBDDE
#undef VBDDD #undef VBDDD
unsigned char *TEMPLATE2(vbzenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { unsigned char *TEMPLATE2(vbzenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) {
uint_t *ip,v; uint_t *ip,v;
unsigned char *op = out; unsigned char *op = out;
#define VBZE { v = TEMPLATE2(zigzagenc, USIZE)((TEMPLATE3(int, USIZE, _t))(*ip)-(TEMPLATE3(int, USIZE, _t))start); start=*ip++; TEMPLATE2(_vbput, USIZE)(op, v, ;); } #define VBZE { v = TEMPLATE2(zigzagenc, USIZE)((TEMPLATE3(int, USIZE, _t))(*ip)-(TEMPLATE3(int, USIZE, _t))start); start=*ip++; TEMPLATE2(_vbput, USIZE)(op, v, ;); }
for(ip = in; ip != in+(n&~(UN-1)); ) { VBZE;VBZE;VBZE;VBZE; } for(ip = in; ip != in+(n&~(UN-1)); ) { VBZE;VBZE;VBZE;VBZE; }
while(ip != in+n) VBZE; //OVERFLOWE(in,n,out,op); while(ip != in+n) VBZE; //OVERFLOWE(in,n,out,op);
return op; return op;
} }
#undef VBZE #undef VBZE
unsigned char *TEMPLATE2(vbzdec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) { unsigned char *TEMPLATE2(vbzdec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) {
uint_t x,*op; uint_t x,*op;
#define VBZD { TEMPLATE2(_vbget, USIZE)(in, x, ;); *op++ = (start += TEMPLATE2(zigzagdec, USIZE)(x)); } #define VBZD { TEMPLATE2(_vbget, USIZE)(in, x, ;); *op++ = (start += TEMPLATE2(zigzagdec, USIZE)(x)); }
for(op = out; op != out+(n&~(UN-1)); ) { VBZD; VBZD; VBZD; VBZD; for(op = out; op != out+(n&~(UN-1)); ) { VBZD; VBZD; VBZD; VBZD;
#if UN > 4 #if UN > 4
VBZD; VBZD; VBZD; VBZD; VBZD; VBZD; VBZD; VBZD;
#endif #endif
PREFETCH(in+16*USIZE, 0); PREFETCH(in+16*USIZE, 0);
} }
while(op != out+n) VBZD; while(op != out+n) VBZD;
return in; return in;
} }
@ -221,47 +221,47 @@ uint_t TEMPLATE2(vbzgetx, USIZE)(unsigned char *__restrict in, unsigned idx, ui
uint_t x; uint_t x;
for(ip = in,i = 0; i <= idx; i++) { for(ip = in,i = 0; i <= idx; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
start += x+VDELTA; start += x+VDELTA;
} }
return start; return start;
} }
unsigned TEMPLATE2(vbzgeteq, USIZE)(unsigned char **__restrict in, unsigned n, unsigned idx, uint_t key, uint_t start ) { unsigned TEMPLATE2(vbzgeteq, USIZE)(unsigned char **__restrict in, unsigned n, unsigned idx, uint_t key, uint_t start ) {
unsigned i; unsigned i;
unsigned char *ip; unsigned char *ip;
uint_t x; uint_t x;
for(ip = *in,i=idx; i < n; i++) { for(ip = *in,i=idx; i < n; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
if((start += x+VDELTA) == key) if((start += x+VDELTA) == key)
break; break;
} }
*in = ip; *in = ip;
return i; return i;
} }
unsigned char *TEMPLATE2(vbxenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { unsigned char *TEMPLATE2(vbxenc, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) {
uint_t *ip,v; uint_t *ip,v;
unsigned char *op = out; unsigned char *op = out;
#define VBXE { v = (*ip)^start; start=*ip++; TEMPLATE2(_vbput, USIZE)(op, v, ;); } #define VBXE { v = (*ip)^start; start=*ip++; TEMPLATE2(_vbput, USIZE)(op, v, ;); }
for(ip = in; ip != in+(n&~(UN-1)); ) { VBXE;VBXE;VBXE;VBXE; } for(ip = in; ip != in+(n&~(UN-1)); ) { VBXE;VBXE;VBXE;VBXE; }
while(ip != in+n) VBXE; //OVERFLOWE(in,n,out,op); while(ip != in+n) VBXE; //OVERFLOWE(in,n,out,op);
return op; return op;
} }
#undef VBZE #undef VBZE
unsigned char *TEMPLATE2(vbxdec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) { unsigned char *TEMPLATE2(vbxdec, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) {
uint_t x,*op; uint_t x,*op;
#define VBXD { TEMPLATE2(_vbget, USIZE)(in, x, ;); *op++ = (start ^= x); } #define VBXD { TEMPLATE2(_vbget, USIZE)(in, x, ;); *op++ = (start ^= x); }
for(op = out; op != out+(n&~(UN-1)); ) { VBXD; VBXD; VBXD; VBXD; for(op = out; op != out+(n&~(UN-1)); ) { VBXD; VBXD; VBXD; VBXD;
#if UN > 4 #if UN > 4
VBXD; VBXD; VBXD; VBXD; VBXD; VBXD; VBXD; VBXD;
#endif #endif
PREFETCH(in+16*USIZE, 0); PREFETCH(in+16*USIZE, 0);
} }
while(op != out+n) VBXD; while(op != out+n) VBXD;
return in; return in;
} }
@ -273,19 +273,19 @@ uint_t TEMPLATE2(vbxgetx, USIZE)(unsigned char *__restrict in, unsigned idx, ui
uint_t x; uint_t x;
for(ip = in,i = 0; i <= idx; i++) { for(ip = in,i = 0; i <= idx; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
start ^= x; start ^= x;
} }
return start; return start;
} }
unsigned TEMPLATE2(vbxgeteq, USIZE)(unsigned char **__restrict in, unsigned n, unsigned idx, uint_t key, uint_t start ) { unsigned TEMPLATE2(vbxgeteq, USIZE)(unsigned char **__restrict in, unsigned n, unsigned idx, uint_t key, uint_t start ) {
unsigned i; unsigned i;
unsigned char *ip; unsigned char *ip;
uint_t x; uint_t x;
for(ip = *in,i=idx; i < n; i++) { for(ip = *in,i=idx; i < n; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
if((start ^= x) == key) if((start ^= x) == key)
break; break;
} }
*in = ip; *in = ip;
@ -294,18 +294,18 @@ unsigned TEMPLATE2(vbxgeteq, USIZE)(unsigned char **__restrict in, unsigned n, u
#endif #endif
unsigned char *TEMPLATE2(VBDENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { unsigned char *TEMPLATE2(VBDENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) {
unsigned char *op = out; unsigned char *op = out;
uint_t *ip, b = 0,v; uint_t *ip, b = 0,v;
if(!n) return out; if(!n) return out;
#if USIZE == 64 #if USIZE == 64
#define VB_MX 255 #define VB_MX 255
#else #else
#define VB_MX VB_MAX #define VB_MX VB_MAX
#endif #endif
#define VBDE { v = ip[0]-start-VDELTA; start = *ip++; TEMPLATE2(_vbput, USIZE)(op, v, ;); b |= (v /*^ x*/); } #define VBDE { v = ip[0]-start-VDELTA; start = *ip++; TEMPLATE2(_vbput, USIZE)(op, v, ;); b |= (v /*^ x*/); }
for(ip = in; ip != in + (n&~(UN-1)); ) { VBDE; VBDE; VBDE; VBDE; for(ip = in; ip != in + (n&~(UN-1)); ) { VBDE; VBDE; VBDE; VBDE;
#if UN > 4 #if UN > 4
VBDE; VBDE; VBDE; VBDE; VBDE; VBDE; VBDE; VBDE;
#endif #endif
} }
while(ip != in+n) VBDE; while(ip != in+n) VBDE;
@ -317,50 +317,50 @@ unsigned char *TEMPLATE2(VBDENC, USIZE)(uint_t *__restrict in, unsigned n, unsig
} }
#undef VBDE #undef VBDE
unsigned char *TEMPLATE2(VBDDEC, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) { unsigned char *TEMPLATE2(VBDDEC, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) {
uint_t x,*op; uint_t x,*op;
if(!n) return in; if(!n) return in;
#if USIZE < 64 #if USIZE < 64
OVERFLOWD(in,n,out,VB_MAX-1); OVERFLOWD(in,n,out,VB_MAX-1);
#endif #endif
if(in[0] == VB_MX) { if(in[0] == VB_MX) {
in++; in++;
#if (defined(__SSE2__) || defined(__ARM_NEON)) && USIZE == 32 #if (defined(__SSE2__) || defined(__ARM_NEON)) && USIZE == 32
#if VDELTA == 0 #if VDELTA == 0
if(n) TEMPLATE2(BITZERO, USIZE)(out, n, start); if(n) TEMPLATE2(BITZERO, USIZE)(out, n, start);
#else #else
if(n) TEMPLATE2(BITDIZERO,USIZE)(out, n, start, VDELTA); if(n) TEMPLATE2(BITDIZERO,USIZE)(out, n, start, VDELTA);
#endif #endif
#else #else
#if VDELTA == 0 #if VDELTA == 0
for(x = 0; x < n; x++) out[x] = start; for(x = 0; x < n; x++) out[x] = start;
#else #else
for(x = 0; x < n; x++) out[x] = start+x*VDELTA; for(x = 0; x < n; x++) out[x] = start+x*VDELTA;
#endif #endif
#endif #endif
return in; return in;
} }
#if 0 //USIZE < 64 #if 0 //USIZE < 64
else if(in[0] == VB_MAX-2) { in++; else if(in[0] == VB_MAX-2) { in++;
uint_t z; uint_t z;
TEMPLATE2(_vbget, USIZE)(in, z, ;); TEMPLATE2(_vbget, USIZE)(in, z, ;);
#if VDELTA == 0 #if VDELTA == 0
for(x = 0; x < n; x++) out[x] = start+z; for(x = 0; x < n; x++) out[x] = start+z;
#else #else
for(x = 0; x < n; x++) out[x] = start+x+z; for(x = 0; x < n; x++) out[x] = start+x+z;
#endif #endif
return in; return in;
} }
#endif #endif
#define VBDD(i) { TEMPLATE2(_vbget, USIZE)(in, x, x+=VDELTA); op[i] = (start += x); } #define VBDD(i) { TEMPLATE2(_vbget, USIZE)(in, x, x+=VDELTA); op[i] = (start += x); }
for(op = out; op != out+(n&~(UN-1)); op+=UN) { for(op = out; op != out+(n&~(UN-1)); op+=UN) {
VBDD(0); VBDD(1); VBDD(2); VBDD(3); VBDD(0); VBDD(1); VBDD(2); VBDD(3);
#if UN > 4 #if UN > 4
VBDD(4); VBDD(5); VBDD(6); VBDD(7); VBDD(4); VBDD(5); VBDD(6); VBDD(7);
#endif #endif
PREFETCH(in+16*USIZE, 0); PREFETCH(in+16*USIZE, 0);
} }
for(;op != out+n;op++) VBDD(0); for(;op != out+n;op++) VBDD(0);
return in; return in;
@ -373,18 +373,18 @@ uint_t TEMPLATE2(VBDGETX, USIZE)(unsigned char *__restrict in, unsigned idx, ui
uint_t x; uint_t x;
#if USIZE > 64 #if USIZE > 64
unsigned long long u; unsigned long long u;
_vbget64(in, u, ;); x = u>>1; start += x+VDELTA; _vbget64(in, u, ;); x = u>>1; start += x+VDELTA;
if(u & 1) return start + VDELTA; if(u & 1) return start + VDELTA;
#endif #endif
for(ip = in; i <= idx; i++) { for(ip = in; i <= idx; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
start += x+VDELTA; start += x+VDELTA;
} }
return start; return start;
} }
unsigned TEMPLATE2(VBDGETGEQ, USIZE)(unsigned char **__restrict in, unsigned n, unsigned idx, uint_t *key, uint_t start ) { unsigned TEMPLATE2(VBDGETGEQ, USIZE)(unsigned char **__restrict in, unsigned n, unsigned idx, uint_t *key, uint_t start ) {
unsigned i=0; unsigned i=0;
unsigned char *ip=*in; unsigned char *ip=*in;
uint_t x; uint_t x;
@ -392,16 +392,16 @@ unsigned TEMPLATE2(VBDGETGEQ, USIZE)(unsigned char **__restrict in, unsigned n,
if(!idx) { if(!idx) {
unsigned long long u; _vbget64(ip, u, ;); x = u>>1; start += x+VDELTA; unsigned long long u; _vbget64(ip, u, ;); x = u>>1; start += x+VDELTA;
if((u & 1) && start == *key) { *in = ip; return 0; } if((u & 1) && start == *key) { *in = ip; return 0; }
i++; i++;
} }
#endif #endif
for(ip = *in; i < n; i++) { for(ip = *in; i < n; i++) {
TEMPLATE2(_vbget, USIZE)(ip, x, ;); TEMPLATE2(_vbget, USIZE)(ip, x, ;);
if((start += x+VDELTA) == *key) if((start += x+VDELTA) == *key)
break; break;
} }
*in = ip; *in = ip;
return i; return i;
} }
#undef uint_t #undef uint_t
#endif #endif