diff --git a/bitunpack.c b/bitunpack.c index e722b1e..9eb2af7 100644 --- a/bitunpack.c +++ b/bitunpack.c @@ -160,262 +160,262 @@ size_t bitnfunpack64( unsigned char *__restrict in, size_t n, uint64_t *__restri static inline __m128i _mm_cvtsi64_si128(__int64 a) { return _mm_loadl_epi64((__m128i*)&a); } #endif static ALIGNED(unsigned char, permv[256][8], 32) = { -0,0,0,0,0,0,0,0, -0,1,1,1,1,1,1,1, -1,0,1,1,1,1,1,1, -0,1,2,2,2,2,2,2, -1,1,0,1,1,1,1,1, -0,2,1,2,2,2,2,2, -2,0,1,2,2,2,2,2, -0,1,2,3,3,3,3,3, -1,1,1,0,1,1,1,1, -0,2,2,1,2,2,2,2, -2,0,2,1,2,2,2,2, -0,1,3,2,3,3,3,3, -2,2,0,1,2,2,2,2, -0,3,1,2,3,3,3,3, -3,0,1,2,3,3,3,3, -0,1,2,3,4,4,4,4, -1,1,1,1,0,1,1,1, -0,2,2,2,1,2,2,2, -2,0,2,2,1,2,2,2, -0,1,3,3,2,3,3,3, -2,2,0,2,1,2,2,2, -0,3,1,3,2,3,3,3, -3,0,1,3,2,3,3,3, -0,1,2,4,3,4,4,4, -2,2,2,0,1,2,2,2, -0,3,3,1,2,3,3,3, -3,0,3,1,2,3,3,3, -0,1,4,2,3,4,4,4, -3,3,0,1,2,3,3,3, -0,4,1,2,3,4,4,4, -4,0,1,2,3,4,4,4, -0,1,2,3,4,5,5,5, -1,1,1,1,1,0,1,1, -0,2,2,2,2,1,2,2, -2,0,2,2,2,1,2,2, -0,1,3,3,3,2,3,3, -2,2,0,2,2,1,2,2, -0,3,1,3,3,2,3,3, -3,0,1,3,3,2,3,3, -0,1,2,4,4,3,4,4, -2,2,2,0,2,1,2,2, -0,3,3,1,3,2,3,3, -3,0,3,1,3,2,3,3, -0,1,4,2,4,3,4,4, -3,3,0,1,3,2,3,3, -0,4,1,2,4,3,4,4, -4,0,1,2,4,3,4,4, -0,1,2,3,5,4,5,5, -2,2,2,2,0,1,2,2, -0,3,3,3,1,2,3,3, -3,0,3,3,1,2,3,3, -0,1,4,4,2,3,4,4, -3,3,0,3,1,2,3,3, -0,4,1,4,2,3,4,4, -4,0,1,4,2,3,4,4, -0,1,2,5,3,4,5,5, -3,3,3,0,1,2,3,3, -0,4,4,1,2,3,4,4, -4,0,4,1,2,3,4,4, -0,1,5,2,3,4,5,5, -4,4,0,1,2,3,4,4, -0,5,1,2,3,4,5,5, -5,0,1,2,3,4,5,5, -0,1,2,3,4,5,6,6, -1,1,1,1,1,1,0,1, -0,2,2,2,2,2,1,2, -2,0,2,2,2,2,1,2, -0,1,3,3,3,3,2,3, -2,2,0,2,2,2,1,2, -0,3,1,3,3,3,2,3, -3,0,1,3,3,3,2,3, -0,1,2,4,4,4,3,4, -2,2,2,0,2,2,1,2, -0,3,3,1,3,3,2,3, -3,0,3,1,3,3,2,3, -0,1,4,2,4,4,3,4, -3,3,0,1,3,3,2,3, -0,4,1,2,4,4,3,4, -4,0,1,2,4,4,3,4, -0,1,2,3,5,5,4,5, -2,2,2,2,0,2,1,2, -0,3,3,3,1,3,2,3, -3,0,3,3,1,3,2,3, -0,1,4,4,2,4,3,4, -3,3,0,3,1,3,2,3, -0,4,1,4,2,4,3,4, -4,0,1,4,2,4,3,4, -0,1,2,5,3,5,4,5, -3,3,3,0,1,3,2,3, -0,4,4,1,2,4,3,4, -4,0,4,1,2,4,3,4, -0,1,5,2,3,5,4,5, -4,4,0,1,2,4,3,4, -0,5,1,2,3,5,4,5, -5,0,1,2,3,5,4,5, -0,1,2,3,4,6,5,6, -2,2,2,2,2,0,1,2, -0,3,3,3,3,1,2,3, -3,0,3,3,3,1,2,3, -0,1,4,4,4,2,3,4, -3,3,0,3,3,1,2,3, -0,4,1,4,4,2,3,4, -4,0,1,4,4,2,3,4, -0,1,2,5,5,3,4,5, -3,3,3,0,3,1,2,3, -0,4,4,1,4,2,3,4, -4,0,4,1,4,2,3,4, -0,1,5,2,5,3,4,5, -4,4,0,1,4,2,3,4, -0,5,1,2,5,3,4,5, -5,0,1,2,5,3,4,5, -0,1,2,3,6,4,5,6, -3,3,3,3,0,1,2,3, -0,4,4,4,1,2,3,4, -4,0,4,4,1,2,3,4, -0,1,5,5,2,3,4,5, -4,4,0,4,1,2,3,4, -0,5,1,5,2,3,4,5, -5,0,1,5,2,3,4,5, -0,1,2,6,3,4,5,6, -4,4,4,0,1,2,3,4, -0,5,5,1,2,3,4,5, -5,0,5,1,2,3,4,5, -0,1,6,2,3,4,5,6, -5,5,0,1,2,3,4,5, -0,6,1,2,3,4,5,6, -6,0,1,2,3,4,5,6, -0,1,2,3,4,5,6,7, -1,1,1,1,1,1,1,0, -0,2,2,2,2,2,2,1, -2,0,2,2,2,2,2,1, -0,1,3,3,3,3,3,2, -2,2,0,2,2,2,2,1, -0,3,1,3,3,3,3,2, -3,0,1,3,3,3,3,2, -0,1,2,4,4,4,4,3, -2,2,2,0,2,2,2,1, -0,3,3,1,3,3,3,2, -3,0,3,1,3,3,3,2, -0,1,4,2,4,4,4,3, -3,3,0,1,3,3,3,2, -0,4,1,2,4,4,4,3, -4,0,1,2,4,4,4,3, -0,1,2,3,5,5,5,4, -2,2,2,2,0,2,2,1, -0,3,3,3,1,3,3,2, -3,0,3,3,1,3,3,2, -0,1,4,4,2,4,4,3, -3,3,0,3,1,3,3,2, -0,4,1,4,2,4,4,3, -4,0,1,4,2,4,4,3, -0,1,2,5,3,5,5,4, -3,3,3,0,1,3,3,2, -0,4,4,1,2,4,4,3, -4,0,4,1,2,4,4,3, -0,1,5,2,3,5,5,4, -4,4,0,1,2,4,4,3, -0,5,1,2,3,5,5,4, -5,0,1,2,3,5,5,4, -0,1,2,3,4,6,6,5, -2,2,2,2,2,0,2,1, -0,3,3,3,3,1,3,2, -3,0,3,3,3,1,3,2, -0,1,4,4,4,2,4,3, -3,3,0,3,3,1,3,2, -0,4,1,4,4,2,4,3, -4,0,1,4,4,2,4,3, -0,1,2,5,5,3,5,4, -3,3,3,0,3,1,3,2, -0,4,4,1,4,2,4,3, -4,0,4,1,4,2,4,3, -0,1,5,2,5,3,5,4, -4,4,0,1,4,2,4,3, -0,5,1,2,5,3,5,4, -5,0,1,2,5,3,5,4, -0,1,2,3,6,4,6,5, -3,3,3,3,0,1,3,2, -0,4,4,4,1,2,4,3, -4,0,4,4,1,2,4,3, -0,1,5,5,2,3,5,4, -4,4,0,4,1,2,4,3, -0,5,1,5,2,3,5,4, -5,0,1,5,2,3,5,4, -0,1,2,6,3,4,6,5, -4,4,4,0,1,2,4,3, -0,5,5,1,2,3,5,4, -5,0,5,1,2,3,5,4, -0,1,6,2,3,4,6,5, -5,5,0,1,2,3,5,4, -0,6,1,2,3,4,6,5, -6,0,1,2,3,4,6,5, -0,1,2,3,4,5,7,6, -2,2,2,2,2,2,0,1, -0,3,3,3,3,3,1,2, -3,0,3,3,3,3,1,2, -0,1,4,4,4,4,2,3, -3,3,0,3,3,3,1,2, -0,4,1,4,4,4,2,3, -4,0,1,4,4,4,2,3, -0,1,2,5,5,5,3,4, -3,3,3,0,3,3,1,2, -0,4,4,1,4,4,2,3, -4,0,4,1,4,4,2,3, -0,1,5,2,5,5,3,4, -4,4,0,1,4,4,2,3, -0,5,1,2,5,5,3,4, -5,0,1,2,5,5,3,4, -0,1,2,3,6,6,4,5, -3,3,3,3,0,3,1,2, -0,4,4,4,1,4,2,3, -4,0,4,4,1,4,2,3, -0,1,5,5,2,5,3,4, -4,4,0,4,1,4,2,3, -0,5,1,5,2,5,3,4, -5,0,1,5,2,5,3,4, -0,1,2,6,3,6,4,5, -4,4,4,0,1,4,2,3, -0,5,5,1,2,5,3,4, -5,0,5,1,2,5,3,4, -0,1,6,2,3,6,4,5, -5,5,0,1,2,5,3,4, -0,6,1,2,3,6,4,5, -6,0,1,2,3,6,4,5, -0,1,2,3,4,7,5,6, -3,3,3,3,3,0,1,2, -0,4,4,4,4,1,2,3, -4,0,4,4,4,1,2,3, -0,1,5,5,5,2,3,4, -4,4,0,4,4,1,2,3, -0,5,1,5,5,2,3,4, -5,0,1,5,5,2,3,4, -0,1,2,6,6,3,4,5, -4,4,4,0,4,1,2,3, -0,5,5,1,5,2,3,4, -5,0,5,1,5,2,3,4, -0,1,6,2,6,3,4,5, -5,5,0,1,5,2,3,4, -0,6,1,2,6,3,4,5, -6,0,1,2,6,3,4,5, -0,1,2,3,7,4,5,6, -4,4,4,4,0,1,2,3, -0,5,5,5,1,2,3,4, -5,0,5,5,1,2,3,4, -0,1,6,6,2,3,4,5, -5,5,0,5,1,2,3,4, -0,6,1,6,2,3,4,5, -6,0,1,6,2,3,4,5, -0,1,2,7,3,4,5,6, -5,5,5,0,1,2,3,4, -0,6,6,1,2,3,4,5, -6,0,6,1,2,3,4,5, -0,1,7,2,3,4,5,6, -6,6,0,1,2,3,4,5, -0,7,1,2,3,4,5,6, -7,0,1,2,3,4,5,6, -0,1,2,3,4,5,6,7 +{0,0,0,0,0,0,0,0}, +{0,1,1,1,1,1,1,1}, +{1,0,1,1,1,1,1,1}, +{0,1,2,2,2,2,2,2}, +{1,1,0,1,1,1,1,1}, +{0,2,1,2,2,2,2,2}, +{2,0,1,2,2,2,2,2}, +{0,1,2,3,3,3,3,3}, +{1,1,1,0,1,1,1,1}, +{0,2,2,1,2,2,2,2}, +{2,0,2,1,2,2,2,2}, +{0,1,3,2,3,3,3,3}, +{2,2,0,1,2,2,2,2}, +{0,3,1,2,3,3,3,3}, +{3,0,1,2,3,3,3,3}, +{0,1,2,3,4,4,4,4}, +{1,1,1,1,0,1,1,1}, +{0,2,2,2,1,2,2,2}, +{2,0,2,2,1,2,2,2}, +{0,1,3,3,2,3,3,3}, +{2,2,0,2,1,2,2,2}, +{0,3,1,3,2,3,3,3}, +{3,0,1,3,2,3,3,3}, +{0,1,2,4,3,4,4,4}, +{2,2,2,0,1,2,2,2}, +{0,3,3,1,2,3,3,3}, +{3,0,3,1,2,3,3,3}, +{0,1,4,2,3,4,4,4}, +{3,3,0,1,2,3,3,3}, +{0,4,1,2,3,4,4,4}, +{4,0,1,2,3,4,4,4}, +{0,1,2,3,4,5,5,5}, +{1,1,1,1,1,0,1,1}, +{0,2,2,2,2,1,2,2}, +{2,0,2,2,2,1,2,2}, +{0,1,3,3,3,2,3,3}, +{2,2,0,2,2,1,2,2}, +{0,3,1,3,3,2,3,3}, +{3,0,1,3,3,2,3,3}, +{0,1,2,4,4,3,4,4}, +{2,2,2,0,2,1,2,2}, +{0,3,3,1,3,2,3,3}, +{3,0,3,1,3,2,3,3}, +{0,1,4,2,4,3,4,4}, +{3,3,0,1,3,2,3,3}, +{0,4,1,2,4,3,4,4}, +{4,0,1,2,4,3,4,4}, +{0,1,2,3,5,4,5,5}, +{2,2,2,2,0,1,2,2}, +{0,3,3,3,1,2,3,3}, +{3,0,3,3,1,2,3,3}, +{0,1,4,4,2,3,4,4}, +{3,3,0,3,1,2,3,3}, +{0,4,1,4,2,3,4,4}, +{4,0,1,4,2,3,4,4}, +{0,1,2,5,3,4,5,5}, +{3,3,3,0,1,2,3,3}, +{0,4,4,1,2,3,4,4}, +{4,0,4,1,2,3,4,4}, +{0,1,5,2,3,4,5,5}, +{4,4,0,1,2,3,4,4}, +{0,5,1,2,3,4,5,5}, +{5,0,1,2,3,4,5,5}, +{0,1,2,3,4,5,6,6}, +{1,1,1,1,1,1,0,1}, +{0,2,2,2,2,2,1,2}, +{2,0,2,2,2,2,1,2}, +{0,1,3,3,3,3,2,3}, +{2,2,0,2,2,2,1,2}, +{0,3,1,3,3,3,2,3}, +{3,0,1,3,3,3,2,3}, +{0,1,2,4,4,4,3,4}, +{2,2,2,0,2,2,1,2}, +{0,3,3,1,3,3,2,3}, +{3,0,3,1,3,3,2,3}, +{0,1,4,2,4,4,3,4}, +{3,3,0,1,3,3,2,3}, +{0,4,1,2,4,4,3,4}, +{4,0,1,2,4,4,3,4}, +{0,1,2,3,5,5,4,5}, +{2,2,2,2,0,2,1,2}, +{0,3,3,3,1,3,2,3}, +{3,0,3,3,1,3,2,3}, +{0,1,4,4,2,4,3,4}, +{3,3,0,3,1,3,2,3}, +{0,4,1,4,2,4,3,4}, +{4,0,1,4,2,4,3,4}, +{0,1,2,5,3,5,4,5}, +{3,3,3,0,1,3,2,3}, +{0,4,4,1,2,4,3,4}, +{4,0,4,1,2,4,3,4}, +{0,1,5,2,3,5,4,5}, +{4,4,0,1,2,4,3,4}, +{0,5,1,2,3,5,4,5}, +{5,0,1,2,3,5,4,5}, +{0,1,2,3,4,6,5,6}, +{2,2,2,2,2,0,1,2}, +{0,3,3,3,3,1,2,3}, +{3,0,3,3,3,1,2,3}, +{0,1,4,4,4,2,3,4}, +{3,3,0,3,3,1,2,3}, +{0,4,1,4,4,2,3,4}, +{4,0,1,4,4,2,3,4}, +{0,1,2,5,5,3,4,5}, +{3,3,3,0,3,1,2,3}, +{0,4,4,1,4,2,3,4}, +{4,0,4,1,4,2,3,4}, +{0,1,5,2,5,3,4,5}, +{4,4,0,1,4,2,3,4}, +{0,5,1,2,5,3,4,5}, +{5,0,1,2,5,3,4,5}, +{0,1,2,3,6,4,5,6}, +{3,3,3,3,0,1,2,3}, +{0,4,4,4,1,2,3,4}, +{4,0,4,4,1,2,3,4}, +{0,1,5,5,2,3,4,5}, +{4,4,0,4,1,2,3,4}, +{0,5,1,5,2,3,4,5}, +{5,0,1,5,2,3,4,5}, +{0,1,2,6,3,4,5,6}, +{4,4,4,0,1,2,3,4}, +{0,5,5,1,2,3,4,5}, +{5,0,5,1,2,3,4,5}, +{0,1,6,2,3,4,5,6}, +{5,5,0,1,2,3,4,5}, +{0,6,1,2,3,4,5,6}, +{6,0,1,2,3,4,5,6}, +{0,1,2,3,4,5,6,7}, +{1,1,1,1,1,1,1,0}, +{0,2,2,2,2,2,2,1}, +{2,0,2,2,2,2,2,1}, +{0,1,3,3,3,3,3,2}, +{2,2,0,2,2,2,2,1}, +{0,3,1,3,3,3,3,2}, +{3,0,1,3,3,3,3,2}, +{0,1,2,4,4,4,4,3}, +{2,2,2,0,2,2,2,1}, +{0,3,3,1,3,3,3,2}, +{3,0,3,1,3,3,3,2}, +{0,1,4,2,4,4,4,3}, +{3,3,0,1,3,3,3,2}, +{0,4,1,2,4,4,4,3}, +{4,0,1,2,4,4,4,3}, +{0,1,2,3,5,5,5,4}, +{2,2,2,2,0,2,2,1}, +{0,3,3,3,1,3,3,2}, +{3,0,3,3,1,3,3,2}, +{0,1,4,4,2,4,4,3}, +{3,3,0,3,1,3,3,2}, +{0,4,1,4,2,4,4,3}, +{4,0,1,4,2,4,4,3}, +{0,1,2,5,3,5,5,4}, +{3,3,3,0,1,3,3,2}, +{0,4,4,1,2,4,4,3}, +{4,0,4,1,2,4,4,3}, +{0,1,5,2,3,5,5,4}, +{4,4,0,1,2,4,4,3}, +{0,5,1,2,3,5,5,4}, +{5,0,1,2,3,5,5,4}, +{0,1,2,3,4,6,6,5}, +{2,2,2,2,2,0,2,1}, +{0,3,3,3,3,1,3,2}, +{3,0,3,3,3,1,3,2}, +{0,1,4,4,4,2,4,3}, +{3,3,0,3,3,1,3,2}, +{0,4,1,4,4,2,4,3}, +{4,0,1,4,4,2,4,3}, +{0,1,2,5,5,3,5,4}, +{3,3,3,0,3,1,3,2}, +{0,4,4,1,4,2,4,3}, +{4,0,4,1,4,2,4,3}, +{0,1,5,2,5,3,5,4}, +{4,4,0,1,4,2,4,3}, +{0,5,1,2,5,3,5,4}, +{5,0,1,2,5,3,5,4}, +{0,1,2,3,6,4,6,5}, +{3,3,3,3,0,1,3,2}, +{0,4,4,4,1,2,4,3}, +{4,0,4,4,1,2,4,3}, +{0,1,5,5,2,3,5,4}, +{4,4,0,4,1,2,4,3}, +{0,5,1,5,2,3,5,4}, +{5,0,1,5,2,3,5,4}, +{0,1,2,6,3,4,6,5}, +{4,4,4,0,1,2,4,3}, +{0,5,5,1,2,3,5,4}, +{5,0,5,1,2,3,5,4}, +{0,1,6,2,3,4,6,5}, +{5,5,0,1,2,3,5,4}, +{0,6,1,2,3,4,6,5}, +{6,0,1,2,3,4,6,5}, +{0,1,2,3,4,5,7,6}, +{2,2,2,2,2,2,0,1}, +{0,3,3,3,3,3,1,2}, +{3,0,3,3,3,3,1,2}, +{0,1,4,4,4,4,2,3}, +{3,3,0,3,3,3,1,2}, +{0,4,1,4,4,4,2,3}, +{4,0,1,4,4,4,2,3}, +{0,1,2,5,5,5,3,4}, +{3,3,3,0,3,3,1,2}, +{0,4,4,1,4,4,2,3}, +{4,0,4,1,4,4,2,3}, +{0,1,5,2,5,5,3,4}, +{4,4,0,1,4,4,2,3}, +{0,5,1,2,5,5,3,4}, +{5,0,1,2,5,5,3,4}, +{0,1,2,3,6,6,4,5}, +{3,3,3,3,0,3,1,2}, +{0,4,4,4,1,4,2,3}, +{4,0,4,4,1,4,2,3}, +{0,1,5,5,2,5,3,4}, +{4,4,0,4,1,4,2,3}, +{0,5,1,5,2,5,3,4}, +{5,0,1,5,2,5,3,4}, +{0,1,2,6,3,6,4,5}, +{4,4,4,0,1,4,2,3}, +{0,5,5,1,2,5,3,4}, +{5,0,5,1,2,5,3,4}, +{0,1,6,2,3,6,4,5}, +{5,5,0,1,2,5,3,4}, +{0,6,1,2,3,6,4,5}, +{6,0,1,2,3,6,4,5}, +{0,1,2,3,4,7,5,6}, +{3,3,3,3,3,0,1,2}, +{0,4,4,4,4,1,2,3}, +{4,0,4,4,4,1,2,3}, +{0,1,5,5,5,2,3,4}, +{4,4,0,4,4,1,2,3}, +{0,5,1,5,5,2,3,4}, +{5,0,1,5,5,2,3,4}, +{0,1,2,6,6,3,4,5}, +{4,4,4,0,4,1,2,3}, +{0,5,5,1,5,2,3,4}, +{5,0,5,1,5,2,3,4}, +{0,1,6,2,6,3,4,5}, +{5,5,0,1,5,2,3,4}, +{0,6,1,2,6,3,4,5}, +{6,0,1,2,6,3,4,5}, +{0,1,2,3,7,4,5,6}, +{4,4,4,4,0,1,2,3}, +{0,5,5,5,1,2,3,4}, +{5,0,5,5,1,2,3,4}, +{0,1,6,6,2,3,4,5}, +{5,5,0,5,1,2,3,4}, +{0,6,1,6,2,3,4,5}, +{6,0,1,6,2,3,4,5}, +{0,1,2,7,3,4,5,6}, +{5,5,5,0,1,2,3,4}, +{0,6,6,1,2,3,4,5}, +{6,0,6,1,2,3,4,5}, +{0,1,7,2,3,4,5,6}, +{6,6,0,1,2,3,4,5}, +{0,7,1,2,3,4,5,6}, +{7,0,1,2,3,4,5,6}, +{0,1,2,3,4,5,6,7} }; #define u2vmask(_m_,_tv_) _mm256_sllv_epi32(_mm256_set1_epi8(_m_), _tv_) #define mm256_maskz_expand_epi32(_m_, _v_) _mm256_permutevar8x32_epi32(_v_, _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(ctou64(permv[_m_]))) )