From 28ef6a40c58f4dd285a22e76bad466c9dc664687 Mon Sep 17 00:00:00 2001 From: x Date: Wed, 7 Jun 2023 13:58:23 +0200 Subject: [PATCH] TurboPFor: Elias fano --- TurboPFor-Integer-Compression/lib/eliasfano.c | 218 ++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 TurboPFor-Integer-Compression/lib/eliasfano.c diff --git a/TurboPFor-Integer-Compression/lib/eliasfano.c b/TurboPFor-Integer-Compression/lib/eliasfano.c new file mode 100644 index 0000000..6cf42cf --- /dev/null +++ b/TurboPFor-Integer-Compression/lib/eliasfano.c @@ -0,0 +1,218 @@ +/** + Copyright (C) powturbo 2013-2023 + SPDX-License-Identifier: GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + - homepage : https://sites.google.com/site/powturbo/ + - github : https://github.com/powturbo + - twitter : https://twitter.com/powturbo + - email : powturbo [_AT_] gmail [_DOT_] com +**/ + +// eliasfano.c - "Integer Compression" Elias Fano +#ifndef USIZE +#include +#include + #ifdef __APPLE__ +#include + #else +#include + #endif +#include "include_/conf.h" +#include "include_/bitpack.h" +#include "include_/bitutil.h" +#include "include_/eliasfano.h" + +#include "include_/bitutil_.h" + +#pragma warning( disable : 4005) +#pragma warning( disable : 4090) +#pragma warning( disable : 4068) + + +#define PAD8(__x) ( (((__x)+8-1)/8) ) + + #ifdef __SSE42__ +#include +#define bslr32(x) _blsr_u32(x) +#define bslr64(x) _blsr_u64(x) + #else +//static inline unsigned long long blsr(unsigned long long x) { return x & (x - 1); } +#define blsr32(_x_) ((_x_) & ((_x_) - 1)) +#define blsr64(_x_) ((_x_) & ((_x_) - 1)) + #endif +#define blsr8(_x_) blsr32(_x_) +#define blsr16(_x_) blsr32(_x_) + +#define EFE(__x,__i,__start) ((__x[__i] - __start)-(__i)*EF_INC) + +#define BITPACK bitpack +#define BITUNPACK bitunpack +#define EF_INC 1 +#define EFANOENC efano1enc +#define EFANODEC efano1dec + +#define USIZE 32 +#include "eliasfano.c" +#undef USIZE + +/*#define USIZE 16 +#include "eliasfano.c" +#undef USIZE*/ + +#undef EF_INC +#undef EFANOENC +#undef EFANODEC + +//---------- +#define EF_INC 0 +#define EFANOENC efanoenc +#define EFANODEC efanodec + +#define USIZE 32 +#include "eliasfano.c" +#undef USIZE + +#define USIZE 64 +#include "eliasfano.c" +#undef USIZE + +/*#define USIZE 16 +#include "eliasfano.c" +#undef USIZE*/ + +#undef BITPACK +#undef BITUNPACK + +#undef EF_INC +#undef EFANOENC +#undef EFANODEC + +//---------------------- + #if defined(__SSE2__) || defined(__ARM_NEON) +#define VSIZE 128 + +#define BITPACK bitpack128v +#define BITUNPACK bitunpack128v +#define EF_INC 1 +#define EFANOENC efano1enc128v +#define EFANODEC efano1dec128v + +#define USIZE 32 +#include "eliasfano.c" +#undef EF_INC +#undef EFANOENC +#undef EFANODEC + +#define EF_INC 0 +#define EFANOENC efanoenc128v +#define EFANODEC efanodec128v +#include "eliasfano.c" + #endif + + #ifdef __AVX2__ +#define VSIZE 256 +#define BITPACK bitpack256v +#define BITUNPACK bitunpack256v +#define EF_INC 1 +#define EFANOENC efano1enc256v +#define EFANODEC efano1dec256v +#include "eliasfano.c" + +#define EF_INC 0 +#define EFANOENC efanoenc256v +#define EFANODEC efanodec256v +#include "eliasfano.c" + #endif + +#else //--------------------------------------------- implementation --------------------------------------------------------------- +#define uint_t T3(uint, USIZE, _t) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wparentheses" + +unsigned char *T2(EFANOENC, USIZE)(uint_t *__restrict in, unsigned n, unsigned char *__restrict out, uint_t start) { + uint_t *ip, e,x,hl,i; + unsigned char *op; + unsigned lb; + uint_t _pa[1024+64],*pa=_pa; + if(!n) return out; + if(n > 1024) pa = malloc(sizeof(pa[0])*(n+64)); if(!pa) die("efanoenc:malloc error size=%d ", n); + e = EFE(in,n-1,start); + if(!e) { out[0] = 0; if(pa != _pa) free(pa);return out+1; } + + lb = T2(bsr, USIZE)(e/n); + x = ((uint_t)1 << lb)-1; hl = PAD8((e>>lb)+n); + + for(i = 0; i != n&~3;) { + pa[i] = EFE(in,i,start) & x; ++i; + pa[i] = EFE(in,i,start) & x; ++i; + pa[i] = EFE(in,i,start) & x; ++i; + pa[i] = EFE(in,i,start) & x; ++i; + } + while(i < n) pa[i] = EFE(in,i,start) & x, ++i; + *out = lb+1; + op = T2(BITPACK,USIZE)(pa, n, out+1, lb); + + memset(op, 0, hl); + for(i = 0; i != n&~3; ) { + x = i + (EFE(in,i,start) >> lb), op[x >> 3] |= (uint_t)1 << (x & 7); ++i; + x = i + (EFE(in,i,start) >> lb), op[x >> 3] |= (uint_t)1 << (x & 7); ++i; + x = i + (EFE(in,i,start) >> lb), op[x >> 3] |= (uint_t)1 << (x & 7); ++i; + x = i + (EFE(in,i,start) >> lb), op[x >> 3] |= (uint_t)1 << (x & 7); ++i; + } + while(i < n) x = i + (EFE(in,i,start) >> lb), op[x >> 3] |= (uint_t)1 << (x & 7),++i; + if(pa != _pa) free(pa); + return op+hl; +} + +unsigned char *T2(EFANODEC, USIZE)(unsigned char *__restrict in, unsigned n, uint_t *__restrict out, uint_t start) { + unsigned char *ip = in; + uint_t i,j,lb = *ip++; + uint64_t b,x; + if(!n) + return in; + + if(!lb) { + #if (defined(__SSE2__) || defined(__ARM_NEON)) && USIZE == 32 + #if EF_INC == 1 + BITFORZERO32(out, n, start, 1); + #else + BITZERO32( out, n, start); + #endif + #else + BITFORSET_(out, n, start, EF_INC); + #endif + return ip; + } + + ip = T2(BITUNPACK,USIZE)(ip, n, out, --lb); + #define EFD(i) if(!b) break; out[i] += ((uint_t)(j+ctz64(b)-i) << lb) + start+i*EF_INC; b = blsr64(b); ++i; + + for(i=j=0;; j += sizeof(uint64_t)*8) { //PREFETCH(ip+256,0); + for(b = ctou64(ip+(j>>3)); ; ) { + EFD(i); EFD(i); EFD(i); EFD(i); + if(!b) break; out[i] += ((uint_t)(j+ctz64(b)-i) << lb) + start+i*EF_INC; + if(unlikely(++i >= n)) + goto e; + b = blsr64(b); + } + } + e:return ip + PAD8((EFE(out,n-1,start)>>lb)+n); +} + +#pragma clang diagnostic pop +#endif