Files
TurboPFor-Integer-Compression/ext/simdcomp/src/simdcomputil.c
2015-01-05 23:13:49 +01:00

57 lines
1.6 KiB
C

#include "../include/simdcomputil.h"
__attribute__((always_inline))
static inline __m128i Delta(__m128i curr, __m128i prev) {
return _mm_sub_epi32(curr,
_mm_or_si128(_mm_slli_si128(curr, 4), _mm_srli_si128(prev, 12)));
}
// returns the integer logarithm of v (bit width)
uint32_t bits(const uint32_t v) {
#ifdef _MSC_VER
if (v == 0) {
return 0;
}
unsigned long answer;
_BitScanReverse(&answer, v);
return answer + 1;
#else
return v == 0 ? 0 : 32 - __builtin_clz(v); // assume GCC-like compiler if not microsoft
#endif
}
__attribute__ ((pure))
uint32_t maxbits(const uint32_t * begin) {
uint32_t accumulator = 0;const uint32_t * k;
for (k = begin; k != begin + SIMDBlockSize; ++k) {
accumulator |= *k;
}
return bits(accumulator);
}
static uint32_t maxbitas32int(const __m128i accumulator) {
uint32_t tmparray[4];
_mm_storeu_si128((__m128i *) (tmparray), accumulator);
return bits(tmparray[0] | tmparray[1] | tmparray[2] | tmparray[3]);
}
// maxbit over 128 integers (SIMDBlockSize) with provided initial value
uint32_t simdmaxbitsd1(uint32_t initvalue, const uint32_t * in) {
__m128i initoffset = _mm_set1_epi32 (initvalue);
const __m128i* pin = (const __m128i*)(in);
__m128i newvec = _mm_loadu_si128(pin);
__m128i accumulator = Delta(newvec , initoffset);
__m128i oldvec = newvec;
uint32_t k;
for(k = 1; 4*k < SIMDBlockSize; ++k) {
newvec = _mm_loadu_si128(pin+k);
accumulator = _mm_or_si128(accumulator,Delta(newvec , oldvec));
oldvec = newvec;
}
initoffset = oldvec;
return maxbitas32int(accumulator);
}