From a2b4366f935c5899b8b50305cdf33338ff37ed57 Mon Sep 17 00:00:00 2001 From: powturbo Date: Tue, 16 Jun 2015 17:13:15 +0200 Subject: [PATCH] BitUtil: delta, zigzag, bitsnum,... --- bitutil.c | 211 +++++++++++++++++++++++------------------------------- 1 file changed, 91 insertions(+), 120 deletions(-) diff --git a/bitutil.c b/bitutil.c index 1880014..ec7de21 100644 --- a/bitutil.c +++ b/bitutil.c @@ -21,129 +21,100 @@ - twitter : https://twitter.com/powturbo - email : powturbo [_AT_] gmail [_DOT_] com **/ -// bitutil.h - "Integer Compression" -#include "conf.h" -#include "bitutil.h" +// bitutil.h - "Integer Compression" +#include -#define BITDELTA(__p,__n, __inc, __start, __act) {\ - typeof(__p[0]) _x, *_p;\ - for(_p = __p; _p != __p+(__n&~(4-1)); ) {\ - _x = (*_p)-__start-__inc; __start = *_p++; __act;\ - _x = (*_p)-__start-__inc; __start = *_p++; __act;\ - _x = (*_p)-__start-__inc; __start = *_p++; __act;\ - _x = (*_p)-__start-__inc; __start = *_p++; __act;\ - }\ - while(_p < __p+__n) { \ - _x = *_p-__start-__inc; __start = *_p++; __act;\ - }\ +#define _BITFORZERO(out, n, start, inc) do {\ + for(i = 0; i != (n&~3); ) {\ + out[i] = start+i*inc; i++;\ + out[i] = start+i*inc; i++;\ + out[i] = start+i*inc; i++;\ + out[i] = start+i*inc; i++;\ + }\ + while(i < n) out[i] = start+i*inc,++i;\ +} while(0) + +#define BITSIZE(__in, __n, __b, __usize) { typeof(__in[0]) *_ip;\ + for(__b=0,_ip = __in; _ip != __in+(__n&~(4-1)); )\ + __b |= *_ip++ | *_ip++ | *_ip++ | *_ip++;\ + while(_ip != __in+__n) __b |= *_ip++;\ + __b = TEMPLATE(bsr, __usize)(__b);\ } -#define BITUNDELTA(__p, __n, __start, __inc) { typeof(__p[0]) *_p;\ - for(_p = __p; _p != __p+(__n&~(4-1)); ) {\ - *_p = (__start += (*_p) + __inc); _p++;\ - *_p = (__start += (*_p) + __inc); _p++;\ - *_p = (__start += (*_p) + __inc); _p++;\ - *_p = (__start += (*_p) + __inc); _p++;\ - }\ - while(_p < __p+__n) { *_p = (__start += (*_p) + __inc); _p++; }\ + #ifdef __SSE2__ +#include + +#define DELTA128_32(__v, __sv) _mm_sub_epi32(__v, _mm_or_si128(_mm_srli_si128(__sv, 12), _mm_slli_si128(__v, 4))) + +#define SCAN128_32( __v, __sv) __v = _mm_add_epi32(__v, _mm_slli_si128(__v, 4)); __sv = _mm_add_epi32(_mm_shuffle_epi32(__sv, _MM_SHUFFLE(3, 3, 3, 3)), _mm_add_epi32(_mm_slli_si128(__v, 8), __v) ) +#define SCANI128_32(__v, __sv, __vi) SCAN128_32(__v, __sv); __sv = _mm_add_epi32(__sv, __vi) + +// SIMD Horizontal OR +#define HOR128_32(__v,__b) __v = _mm_or_si128(__v, _mm_srli_si128(__v, 8)); __v = _mm_or_si128(__v, _mm_srli_si128(__v, 4)); __b = (unsigned)_mm_cvtsi128_si32(__v) + +#define BITSIZE32(__in, __n, __b) { typeof(__in[0]) *_ip; __m128i v = _mm_setzero_si128();\ + for(_ip = __in; _ip != __in+(__n&~(4-1)); _ip+=4) v = _mm_or_si128(v, _mm_loadu_si128((__m128i*)_ip));\ + HOR128_32(v,__b);\ + while(_ip != __in+__n) __b |= *_ip++;\ + __b = bsr32(__b);\ } -unsigned bitdelta32(unsigned *in, unsigned n, unsigned *out, unsigned start, unsigned inc) { - #ifdef __SSE2__ - unsigned *ip,b,*op = out; - __m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(inc), dv; - for(ip = in; ip != in+(n&~(4-1)); ip += 4) { - __m128i iv = _mm_loadu_si128((__m128i *)ip); - bv = _mm_or_si128(bv, dv = _mm_sub_epi32(DELTA128_32(iv,sv),cv)); - sv = iv; - _mm_storeu_si128((__m128i *)op, dv); - op += 4; - } - start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12)); - HOR128_32(bv, b); - while(ip < in+n) { unsigned x = *ip-start-inc; start = *ip++; b |= x; *op++ = x; } - #else - typeof(in[0]) b = 0,*op = out; BITDELTA(in, n, inc, start, b |= _x;*op++ = _x); - #endif - return bsr32(b); +#define BITZERO32(out, n, start) do {\ + __m128i sv = _mm_set1_epi32(start), *ov = (__m128i *)(out), *ove = (__m128i *)(out + n);\ + do { _mm_storeu_si128(ov++, sv); } while(ov < ove); \ +} while(0) + +#define BITFORZERO32(out, n, start, inc) do {\ + __m128i sv = _mm_set1_epi32(start), *ov=(__m128i *)(out), *ove = (__m128i *)(out + n), cv = _mm_set_epi32(3*inc,2*inc,1*inc,0); \ + sv = _mm_add_epi32(sv, cv);\ + cv = _mm_set1_epi32(4);\ + do { _mm_storeu_si128(ov++, sv); sv = _mm_add_epi32(sv, cv); } while(ov < ove);\ +} while(0) + +#define BITDIZERO32(out, n, start, inc) do { __m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(3+inc,2+inc,1+inc,inc), *ov=(__m128i *)(out), *ove = (__m128i *)(out + n);\ + sv = _mm_add_epi32(sv, cv); cv = _mm_set1_epi32(4*inc); do { _mm_storeu_si128(ov++, sv), sv = _mm_add_epi32(sv, cv); } while(ov < ove);\ +} while(0) + + #else +#define BITSIZE32(__in, __n, __b) BITSIZE(__in, __n, __b, 32) +#define BITFORZERO32(out, n, start, inc) _BITFORZERO(out, n, start, inc) +#define BITZERO32(out, n, start) _BITFORZERO(out, n, start, 0) + #endif + +#define ZIGZAG( __in, __n, __mode, __out) { unsigned _v; for( __out[0]=__in[0],_v = __n-1; _v > 0; --_v) { int _z = ((int)__in[_v] - (int)__in[_v-1]) - __mode; __out[_v] = (_z << 1) ^ (_z >> 31); } } +#define ZIGZAGB(__in, __n, __mode, __b, __out) { unsigned _v; for(__b=0,__out[0]=__in[0],_v = __n-1; _v > 0; --_v) { int _z = ((int)__in[_v] - (int)__in[_v-1]) - __mode; __out[_v] = (_z << 1) ^ (_z >> 31); __b |= __out[_v]; } __b = bsr32(__b); } + +#define UNZIGZAG(__out, __n, __mode) { unsigned _v,_x;\ + for(_x = __out[0],_v = 1; _v <__n; _v++) { unsigned _z = __out[_v]; __out[_v] = (_x += (_z >> 1 ^ -(_z & 1)) + __mode); }\ +} + +#define DELTR( __in, __n, __mode, __out) { unsigned _v; for( __out[0]=__in[0],_v = 1; _v < __n; _v++) __out[_v] = (__in[_v] - __out[0]) - _v*__mode; } +#define DELTRB(__in, __n, __mode, __b, __out) { unsigned _v; for(__b=0,__out[0]=__in[0],_v = 1; _v < __n; _v++) __out[_v] = (__in[_v] - __out[0]) - _v*__mode, __b |= __out[_v]; __b = bsr32(__b); } + +#ifdef __cplusplus +extern "C" { +#endif + +// get maximum bit length of the elements in the integer array +unsigned bit32( unsigned *in, unsigned n); + +// transform sorted integer array to delta array. inc = increment +unsigned bitdelta32(unsigned *in, unsigned n, unsigned *out, unsigned start, unsigned inc); +unsigned bitdelta64(uint64_t *in, unsigned n, uint64_t *out, uint64_t start, unsigned inc); + +// get delta maximum bit length of the non decreasing integer array +unsigned bitd32( unsigned *in, unsigned n, unsigned start); + +// get delta maximum bit length of the non strictly decreasing integer array +unsigned bitd132(unsigned *in, unsigned n, unsigned start); + +void bitund32( unsigned *p, unsigned n, unsigned x); +void bitund64( uint64_t *p, unsigned n, uint64_t x); + +void bitundx32(unsigned *p, unsigned n, unsigned x, unsigned inc); +void bitundx64(uint64_t *p, unsigned n, uint64_t x, unsigned inc); + +void bitund132(unsigned *p, unsigned n, unsigned x); +#ifdef __cplusplus } - -unsigned bitdelta64(uint64_t *in, unsigned n, uint64_t *out, uint64_t start, unsigned inc) { - typeof(in[0]) b = 0,*op = out; BITDELTA(in, n, inc, start, b |= _x; *op++ = _x); - return bsr64(b); -} - -unsigned bit32(unsigned *in, unsigned n) { - typeof(in[0]) b; BITSIZE32(in, n, b); - return b; -} - -unsigned bitd32(unsigned *in, unsigned n, unsigned start) { - #ifdef __SSE2__ - unsigned *ip,b; __m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start); - for(ip = in; ip != in+(n&~(4-1)); ip += 4) { - __m128i iv = _mm_loadu_si128((__m128i *)ip); - bv = _mm_or_si128(bv, DELTA128_32(iv,sv)); - sv = iv; - } - - start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12)); - HOR128_32(bv, b); - while(ip < in+n) { - unsigned x = *ip-start; - start = *ip++; - b |= x; - } - #else - typeof(in[0]) b = 0; BITDELTA(in,n, 0, start, b |= _x); - #endif - return bsr32(b); -} - -unsigned bitd132(unsigned *in, unsigned n, unsigned start) { - #ifdef __SSE2__ - unsigned *ip,b; __m128i bv = _mm_setzero_si128(), sv = _mm_set1_epi32(start), cv = _mm_set1_epi32(1); - for(ip = in; ip != in+(n&~(4-1)); ip += 4) { - __m128i iv = _mm_loadu_si128((__m128i *)ip); - bv = _mm_or_si128(bv, _mm_sub_epi32(DELTA128_32(iv,sv),cv)); - sv = iv; - } - - start = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12)); - HOR128_32(bv, b); - while(ip < in+n) { - unsigned x = *ip-start-1; - start = *ip++; - b |= x; - } - #else - typeof(in[0]) b = 0; BITDELTA(in, n, 1, start, b |= _x); - #endif - return bsr32(b); -} - -void bitund32( unsigned *p, unsigned n, unsigned x) { BITUNDELTA(p, n, x, 0); } -void bitund64( uint64_t *p, unsigned n, uint64_t x) { BITUNDELTA(p, n, x, 0); } - -void bitund132(unsigned *p, unsigned n, unsigned x) { - #ifdef __SSE2__ - __m128i sv = _mm_set1_epi32(x), cv = _mm_set_epi32(4,3,2,1); - unsigned *ip; - for(ip = p; ip != p+(n&~(4-1)); ) { - __m128i v = _mm_loadu_si128((__m128i *)ip); - SCANI128_32(v, sv, cv); - _mm_storeu_si128((__m128i *)ip, sv); - ip += 4; - } - x = (unsigned)_mm_cvtsi128_si32(_mm_srli_si128(sv,12)); - while(ip < p+n) { - *ip = (x += (*ip) + 1); - ip++; - } - #else - BITUNDELTA(p, n, x, 1); - #endif -} - -void bitundx32(unsigned *p, unsigned n, unsigned x, unsigned inc) { BITUNDELTA(p, n, x, inc); } -void bitundx64(uint64_t *p, unsigned n, uint64_t x, unsigned inc) { BITUNDELTA(p, n, x, inc); } +#endif