From e6f07bb537fd656776d281a2b053a1ca48e9f0b4 Mon Sep 17 00:00:00 2001 From: x Date: Sat, 21 Dec 2019 14:06:33 +0100 Subject: [PATCH] TurboPFor: Elias fano encode/decode --- eliasfano.c | 76 ++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/eliasfano.c b/eliasfano.c index 4ba7346..e47af1e 100644 --- a/eliasfano.c +++ b/eliasfano.c @@ -1,7 +1,7 @@ /** Copyright (C) powturbo 2013-2019 GPL v2 License - + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -25,9 +25,9 @@ // eliasfano.c - "Integer Compression" Elias Fano #ifndef USIZE #include -#pragma warning( disable : 4005) -#pragma warning( disable : 4090) -#pragma warning( disable : 4068) +#pragma warning( disable : 4005) +#pragma warning( disable : 4090) +#pragma warning( disable : 4068) #include "conf.h" #include "bitpack.h" @@ -42,13 +42,13 @@ #define bslr32(x) _blsr_u32(x) #define bslr64(x) _blsr_u64(x) #else -//static inline unsigned long long blsr(unsigned long long x) { return x & (x - 1); } +//static inline unsigned long long blsr(unsigned long long x) { return x & (x - 1); } #define blsr32(_x_) ((_x_) & ((_x_) - 1)) #define blsr64(_x_) ((_x_) & ((_x_) - 1)) #endif #define blsr8(_x_) blsr32(_x_) #define blsr16(_x_) blsr32(_x_) - + #define EFE(__x,__i,__start) ((__x[__i] - __start)-(__i)*EF_INC) #define BITPACK bitpack @@ -112,7 +112,7 @@ #include "eliasfano.c" #endif - + #ifdef __AVX2__ #define VSIZE 256 #define BITPACK bitpack256v @@ -131,22 +131,22 @@ #else //--------------------------------------------- implementation --------------------------------------------------------------- #define uint_t TEMPLATE3(uint, USIZE, _t) -#pragma clang diagnostic push +#pragma clang diagnostic push #pragma clang diagnostic ignored "-Wparentheses" -unsigned char *TEMPLATE2(EFANOENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { - uint_t *ip, e,x,hl,i; +unsigned char *TEMPLATE2(EFANOENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { + uint_t *ip, e,x,hl,i; unsigned char *op; unsigned lb; - uint_t _pa[1024+64],*pa=_pa; - if(!n) return out; + uint_t _pa[1024+64],*pa=_pa; + if(!n) return out; if(n > 1024) pa = malloc(sizeof(pa[0])*(n+64)); if(!pa) die("efanoenc:malloc error size=%d ", n); e = EFE(in,n-1,start); - if(!e) { out[0] = 0; if(pa != _pa) free(pa);return out+1; } - - lb = TEMPLATE2(bsr, USIZE)(e/n); - x = ((uint_t)1 << lb)-1; hl = PAD8((e>>lb)+n); - + if(!e) { out[0] = 0; if(pa != _pa) free(pa);return out+1; } + + lb = TEMPLATE2(bsr, USIZE)(e/n); + x = ((uint_t)1 << lb)-1; hl = PAD8((e>>lb)+n); + for(i = 0; i != n&~3;) { pa[i] = EFE(in,i,start) & x; ++i; pa[i] = EFE(in,i,start) & x; ++i; @@ -154,9 +154,9 @@ unsigned char *TEMPLATE2(EFANOENC, USIZE)(uint_t *__restrict in, unsigned n, uns pa[i] = EFE(in,i,start) & x; ++i; } while(i < n) pa[i] = EFE(in,i,start) & x, ++i; - *out = lb+1; + *out = lb+1; op = TEMPLATE2(BITPACK,USIZE)(pa, n, out+1, lb); - + memset(op, 0, hl); for(i = 0; i != n&~3; ) { x = i + (EFE(in,i,start) >> lb), op[x >> 3] |= (uint_t)1 << (x & 7); ++i; @@ -170,38 +170,38 @@ unsigned char *TEMPLATE2(EFANOENC, USIZE)(uint_t *__restrict in, unsigned n, uns } unsigned char *TEMPLATE2(EFANODEC, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) { - unsigned char *ip = in; - uint_t i,j,lb = *ip++; - uint64_t b,x; - if(!n) - return in; - - if(!lb) { + unsigned char *ip = in; + uint_t i,j,lb = *ip++; + uint64_t b,x; + if(!n) + return in; + + if(!lb) { #if (defined(__SSE2__) || defined(__ARM_NEON)) && USIZE == 32 #if EF_INC == 1 BITFORZERO32(out, n, start, 1); #else BITZERO32( out, n, start); #endif - #else + #else BITFORSET_(out, n, start, EF_INC); #endif - return ip; + return ip; } - + ip = TEMPLATE2(BITUNPACK,USIZE)(ip, n, out, --lb); #define EFD(i) if(!b) break; out[i] += ((uint_t)(j+ctz64(b)-i) << lb) + start+i*EF_INC; b = blsr64(b); ++i; - - for(i=j=0;; j += sizeof(uint64_t)*8) { //PREFETCH(ip+256,0); - for(b = ctou64(ip+(j>>3)); ; ) { - EFD(i); EFD(i); EFD(i); EFD(i); - if(!b) break; out[i] += ((uint_t)(j+ctz64(b)-i) << lb) + start+i*EF_INC; - if(unlikely(++i >= n)) - goto e; + + for(i=j=0;; j += sizeof(uint64_t)*8) { //PREFETCH(ip+256,0); + for(b = ctou64(ip+(j>>3)); ; ) { + EFD(i); EFD(i); EFD(i); EFD(i); + if(!b) break; out[i] += ((uint_t)(j+ctz64(b)-i) << lb) + start+i*EF_INC; + if(unlikely(++i >= n)) + goto e; b = blsr64(b); - } + } } - e:return ip + PAD8((EFE(out,n-1,start)>>lb)+n); + e:return ip + PAD8((EFE(out,n-1,start)>>lb)+n); } #pragma clang diagnostic pop