BitUnpack SIMD
This commit is contained in:
274
bitunpackv.c
274
bitunpackv.c
@ -1,5 +1,5 @@
|
||||
/**
|
||||
Copyright (C) powturbo 2013-2015
|
||||
Copyright (C) powturbo 2013-2016
|
||||
GPL v2 License
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
@ -21,127 +21,239 @@
|
||||
- twitter : https://twitter.com/powturbo
|
||||
- email : powturbo [_AT_] gmail [_DOT_] com
|
||||
**/
|
||||
// bitunpack_.h - "Integer Compression" Bit Packing
|
||||
// bitunpackv.c - "Integer Compression" SIMD Bit Packing
|
||||
#ifndef VSTO
|
||||
#include <stdio.h>
|
||||
|
||||
#include <strings.h>
|
||||
#include <emmintrin.h>
|
||||
#include "conf.h"
|
||||
#include "bitutil.h"
|
||||
|
||||
#include "bitunpack.h"
|
||||
|
||||
#define PAD8(__x) (((__x)+7)/8)
|
||||
|
||||
//-----------------------------------------------------------------------------------------------------------------
|
||||
#define VSTO(op, i, ov, parm) _mm_storeu_si128(op++, ov)
|
||||
//-----------------------------------------------------------------------------
|
||||
#define VSTO( _op_, _i_, ov, _parm_) _mm_storeu_si128(_op_++, ov)
|
||||
#define VSTO0(_op_, _i_, ov, _parm_) _mm_storeu_si128(_op_++, _parm_)
|
||||
#include "bitunpackv.c"
|
||||
|
||||
#include __FILE__
|
||||
#define BITUNBLKV32_0(ip, i, __op, __parm) {\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
_mm_storeu_si128(__op++, __parm);\
|
||||
#define BITUNBLKV32_0(ip, _i_, _op_, _parm_) {__m128i ov;\
|
||||
VSTO0(_op_, 0, ov, _parm_);\
|
||||
VSTO0(_op_, 1, ov, _parm_);\
|
||||
VSTO0(_op_, 2, ov, _parm_);\
|
||||
VSTO0(_op_, 3, ov, _parm_);\
|
||||
VSTO0(_op_, 4, ov, _parm_);\
|
||||
VSTO0(_op_, 5, ov, _parm_);\
|
||||
VSTO0(_op_, 6, ov, _parm_);\
|
||||
VSTO0(_op_, 7, ov, _parm_);\
|
||||
VSTO0(_op_, 8, ov, _parm_);\
|
||||
VSTO0(_op_, 9, ov, _parm_);\
|
||||
VSTO0(_op_, 10, ov, _parm_);\
|
||||
VSTO0(_op_, 11, ov, _parm_);\
|
||||
VSTO0(_op_, 12, ov, _parm_);\
|
||||
VSTO0(_op_, 13, ov, _parm_);\
|
||||
VSTO0(_op_, 14, ov, _parm_);\
|
||||
VSTO0(_op_, 15, ov, _parm_);\
|
||||
VSTO0(_op_, 16, ov, _parm_);\
|
||||
VSTO0(_op_, 17, ov, _parm_);\
|
||||
VSTO0(_op_, 18, ov, _parm_);\
|
||||
VSTO0(_op_, 19, ov, _parm_);\
|
||||
VSTO0(_op_, 20, ov, _parm_);\
|
||||
VSTO0(_op_, 21, ov, _parm_);\
|
||||
VSTO0(_op_, 22, ov, _parm_);\
|
||||
VSTO0(_op_, 23, ov, _parm_);\
|
||||
VSTO0(_op_, 24, ov, _parm_);\
|
||||
VSTO0(_op_, 25, ov, _parm_);\
|
||||
VSTO0(_op_, 26, ov, _parm_);\
|
||||
VSTO0(_op_, 27, ov, _parm_);\
|
||||
VSTO0(_op_, 28, ov, _parm_);\
|
||||
VSTO0(_op_, 29, ov, _parm_);\
|
||||
VSTO0(_op_, 30, ov, _parm_);\
|
||||
VSTO0(_op_, 31, ov, _parm_);\
|
||||
}
|
||||
#define BITUNPACK0(__parm) __parm = _mm_setzero_si128()
|
||||
#define BITUNPACK0(_parm_) _parm_ = _mm_setzero_si128()
|
||||
|
||||
unsigned char *bitunpackv32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b) { unsigned char *ip = in+PAD8(n*b); __m128i sv; BITUNPACKV32(in, n, b, out, sv); return ip; }
|
||||
unsigned char *bitunpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b) {
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
#undef BITUNPACK0
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef __SSSE3__
|
||||
#include <tmmintrin.h>
|
||||
static ALIGNED(char, shuffles[16][16], 16) = {
|
||||
#define _ 0x80
|
||||
{ _,_,_,_, _,_,_,_, _,_, _, _, _, _, _,_ },
|
||||
{ 0,1,2,3, _,_,_,_, _,_, _, _, _, _, _,_ },
|
||||
{ _,_,_,_, 0,1,2,3, _,_, _, _, _, _, _,_ },
|
||||
{ 0,1,2,3, 4,5,6,7, _,_, _, _, _, _, _,_ },
|
||||
{ _,_,_,_, _,_,_,_, 0,1, 2, 3, _, _, _,_ },
|
||||
{ 0,1,2,3, _,_,_,_, 4,5, 6, 7, _, _, _,_ },
|
||||
{ _,_,_,_, 0,1,2,3, 4,5, 6, 7, _, _, _,_ },
|
||||
{ 0,1,2,3, 4,5,6,7, 8,9,10,11, _, _, _,_ },
|
||||
{ _,_,_,_, _,_,_,_, _,_,_,_, 0, 1, 2, 3 },
|
||||
{ 0,1,2,3, _,_,_,_, _,_,_, _, 4, 5, 6, 7 },
|
||||
{ _,_,_,_, 0,1,2,3, _,_,_, _, 4, 5, 6, 7 },
|
||||
{ 0,1,2,3, 4,5,6,7, _,_, _, _, 8, 9,10,11 },
|
||||
{ _,_,_,_, _,_,_,_, 0,1, 2, 3, 4, 5, 6, 7 },
|
||||
{ 0,1,2,3, _,_,_,_, 4,5, 6, 7, 8, 9,10,11 },
|
||||
{ _,_,_,_, 0,1,2,3, 4,5, 6, 7, 8, 9,10,11 },
|
||||
{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 },
|
||||
#undef _
|
||||
};
|
||||
|
||||
//------------------------------------------------------
|
||||
#define VSTO( _op_, _i_, _ov_, _parm_) if(!((_i_) & 1)) m = (*bb) & 0xf;else m = (*bb++) >> 4; _mm_storeu_si128(_op_++, _mm_add_epi32(_ov_, _mm_shuffle_epi8(_mm_slli_epi32(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)shuffles[m]) ) )); pex += popcnt32(m)
|
||||
#define VSTO0(_op_, _i_, ov, _parm_) if(!((_i_) & 1)) m = (*bb) & 0xf;else m = (*bb++) >> 4; _mm_storeu_si128(_op_++, _mm_shuffle_epi8( _mm_loadu_si128((__m128i*)pex), _mm_load_si128((__m128i*)shuffles[m]) ) ); pex += popcnt32(m)
|
||||
#define BITUNPACK0(_parm_) //_parm_ = _mm_setzero_si128()
|
||||
#include "bitunpackv.c"
|
||||
|
||||
unsigned char *_bitunpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned b, unsigned *__restrict pex, unsigned char *bb) {
|
||||
const unsigned char *ip = in+PAD8(n*b); unsigned m;
|
||||
__m128i sv;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
#undef BITUNPACK0
|
||||
#endif
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
#define VSTO0(_op_, _i_, ov, _parm_) _mm_storeu_si128(_op_++, _parm_)
|
||||
#define VSTO(__op, i, __ov, __sv) __ov = UNZIGZAG128_32(__ov); SCAN128_32(__ov,__sv); _mm_storeu_si128(__op++, __sv)
|
||||
#include "bitunpackv.c"
|
||||
|
||||
#include __FILE__
|
||||
#define BITUNPACK0(__parm)
|
||||
#define BITUNPACK0(_parm_)
|
||||
|
||||
unsigned char *bitzunpackv32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { unsigned char *ip = in+PAD8(n*b);
|
||||
unsigned char *bitzunpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) {
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv = _mm_set1_epi32(start);
|
||||
BITUNPACKV32(in, n, b, out, sv); return ip;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef BITUNPACK0
|
||||
|
||||
//------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
#define VSTO(__op, i, __ov, __sv) SCAN128_32(__ov,__sv); _mm_storeu_si128(__op++, __sv)
|
||||
#include "bitunpackv.c"
|
||||
|
||||
#include __FILE__
|
||||
#define BITUNPACK0(__parm)
|
||||
#define BITUNPACK0(_parm_)
|
||||
|
||||
unsigned char *bitdunpackv32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { unsigned char *ip = in+PAD8(n*b);
|
||||
unsigned char *bitdunpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) {
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv = _mm_set1_epi32(start);
|
||||
BITUNPACKV32(in, n, b, out, sv); return ip;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef BITUNBLKV32_0
|
||||
#undef VSTO0
|
||||
#undef BITUNPACK0
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------------------------------------
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef __SSSE3__
|
||||
#define VEXP(_i_, _ov_) if(!((_i_) & 1)) m = (*bb) & 0xf;else m = (*bb++) >> 4; _ov_ = _mm_add_epi32(_ov_, _mm_shuffle_epi8(_mm_slli_epi32(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)shuffles[m]) ) ); pex += popcnt32(m)
|
||||
#define VSTO( _op_, _i_, _ov_, _sv_) VEXP( _i_, _ov_); SCAN128_32(_ov_,_sv_); _mm_storeu_si128(_op_++, _sv_);
|
||||
|
||||
#define VEXP0(_i_, _ov_) if(!((_i_) & 1)) m = (*bb) & 0xf;else m = (*bb++) >> 4; _ov_ = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)pex),_mm_load_si128((__m128i*)shuffles[m]) ); pex += popcnt32(m)
|
||||
#define VSTO0(_op_, _i_, _ov_, _sv_) VEXP0( _i_, _ov_); SCAN128_32(_ov_,_sv_); _mm_storeu_si128(_op_++, _sv_);
|
||||
|
||||
#include "bitunpackv.c"
|
||||
|
||||
#define BITUNPACK0(_parm_)
|
||||
|
||||
unsigned char *_bitdunpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb) {
|
||||
const unsigned char *ip = in+PAD8(n*b); unsigned m;
|
||||
__m128i sv = _mm_set1_epi32(start);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
#undef BITUNPACK0
|
||||
#endif
|
||||
//-----------------------------------------------------------------------------
|
||||
#define VSTO(__op, i, __ov, __sv) SCANI128_32(__ov,__sv,cv); _mm_storeu_si128(__op++, __sv);
|
||||
#define VSTO0(_op_, _i_, ov, _parm_) _mm_storeu_si128(_op_++, _parm_); _parm_ = _mm_add_epi32(_parm_, cv)
|
||||
#include "bitunpackv.c"
|
||||
|
||||
#include __FILE__
|
||||
#define BITUNBLKV32_0(ip, i, __op, __parm) {\
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
_mm_storeu_si128(__op++, __parm); __parm = _mm_add_epi32(__parm, cv); \
|
||||
}
|
||||
#define BITUNPACK0(__parm) __parm = _mm_add_epi32(__parm, cv); cv = _mm_set1_epi32(4)
|
||||
#define BITUNPACK0(_parm_) _parm_ = _mm_add_epi32(_parm_, cv); cv = _mm_set1_epi32(4)
|
||||
|
||||
unsigned char *bitd1unpackv32( unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) { unsigned char *ip = in+PAD8(n*b);
|
||||
unsigned char *bitd1unpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b) {
|
||||
const unsigned char *ip = in+PAD8(n*b);
|
||||
__m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(4,3,2,1);
|
||||
BITUNPACKV32(in, n, b, out, sv); return ip;
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef BITUNBLKV32_0
|
||||
#undef VSTO0
|
||||
#undef BITUNPACK0
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef __SSSE3__
|
||||
#define VEXP(_i_, _ov_) if(!((_i_) & 1)) m = (*bb) & 0xf;else m = (*bb++) >> 4; _ov_ = _mm_add_epi32(_ov_, _mm_shuffle_epi8(_mm_slli_epi32(_mm_loadu_si128((__m128i*)pex), b), _mm_load_si128((__m128i*)shuffles[m]) ) ); pex += popcnt32(m)
|
||||
#define VSTO( _op_, _i_, _ov_, _sv_) VEXP( _i_, _ov_); SCANI128_32(_ov_,_sv_,cv); _mm_storeu_si128(_op_++, _sv_);
|
||||
|
||||
#define VEXP0(_i_, _ov_) if(!((_i_) & 1)) m = (*bb) & 0xf;else m = (*bb++) >> 4; _ov_ = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)pex),_mm_load_si128((__m128i*)shuffles[m]) ); pex += popcnt32(m)
|
||||
#define VSTO0(_op_, _i_, _ov_, _sv_) VEXP0( _i_, _ov_); SCANI128_32(_ov_,_sv_,cv); _mm_storeu_si128(_op_++, _sv_);
|
||||
|
||||
#include "bitunpackv.c"
|
||||
|
||||
#define BITUNPACK0(_parm_) mv = _mm_set1_epi32(0) //_parm_ = _mm_setzero_si128()
|
||||
|
||||
unsigned char *_bitd1unpackv32( const unsigned char *__restrict in, unsigned n, unsigned *__restrict out, unsigned start, unsigned b, unsigned *__restrict pex, unsigned char *bb) {
|
||||
const unsigned char *ip = in+PAD8(n*b); unsigned m;
|
||||
__m128i sv = _mm_set1_epi32(start), cv = _mm_set_epi32(4,3,2,1);
|
||||
BITUNPACKV32(in, n, b, out, sv);
|
||||
return ip;
|
||||
}
|
||||
#undef VSTO
|
||||
#undef VSTO0
|
||||
#undef BITUNPACK0
|
||||
#endif
|
||||
|
||||
#else
|
||||
#include <strings.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "bitunpackv32_.h"
|
||||
|
||||
#define BITUNPACKV32(__ip, __n, __nbits, __op, __parm) { __m128i mv,*__ov=(__m128i *)__op, *__ove = (__m128i *)(__op + __n),*__iv=(__m128i *)__ip; \
|
||||
#define BITUNPACKV32(__ip, __n, __nbits, __op, _parm_) { __m128i mv,*_ov=(__m128i *)__op,*_iv=(__m128i *)__ip; \
|
||||
switch(__nbits&0x3f) {\
|
||||
case 0: { BITUNPACK0(__parm); do BITUNPACKV32_0( __iv, __ov, __parm) while(__ov<__ove); } break;\
|
||||
case 1: mv = _mm_set1_epi32((1u<< 1)-1); BITUNPACKV32_1( __iv, __ov, __parm); break;\
|
||||
case 2: mv = _mm_set1_epi32((1u<< 2)-1); BITUNPACKV32_2( __iv, __ov, __parm); break;\
|
||||
case 3: mv = _mm_set1_epi32((1u<< 3)-1); BITUNPACKV32_3( __iv, __ov, __parm); break;\
|
||||
case 4: mv = _mm_set1_epi32((1u<< 4)-1); BITUNPACKV32_4( __iv, __ov, __parm); break;\
|
||||
case 5: mv = _mm_set1_epi32((1u<< 5)-1); BITUNPACKV32_5( __iv, __ov, __parm); break;\
|
||||
case 6: mv = _mm_set1_epi32((1u<< 6)-1); BITUNPACKV32_6( __iv, __ov, __parm); break;\
|
||||
case 7: mv = _mm_set1_epi32((1u<< 7)-1); BITUNPACKV32_7( __iv, __ov, __parm); break;\
|
||||
case 8: mv = _mm_set1_epi32((1u<< 8)-1); BITUNPACKV32_8( __iv, __ov, __parm); break;\
|
||||
case 9: mv = _mm_set1_epi32((1u<< 9)-1); BITUNPACKV32_9( __iv, __ov, __parm); break;\
|
||||
case 10: mv = _mm_set1_epi32((1u<<10)-1); BITUNPACKV32_10(__iv, __ov, __parm); break;\
|
||||
case 11: mv = _mm_set1_epi32((1u<<11)-1); BITUNPACKV32_11(__iv, __ov, __parm); break;\
|
||||
case 12: mv = _mm_set1_epi32((1u<<12)-1); BITUNPACKV32_12(__iv, __ov, __parm); break;\
|
||||
case 13: mv = _mm_set1_epi32((1u<<13)-1); BITUNPACKV32_13(__iv, __ov, __parm); break;\
|
||||
case 14: mv = _mm_set1_epi32((1u<<14)-1); BITUNPACKV32_14(__iv, __ov, __parm); break;\
|
||||
case 15: mv = _mm_set1_epi32((1u<<15)-1); BITUNPACKV32_15(__iv, __ov, __parm); break;\
|
||||
case 16: mv = _mm_set1_epi32((1u<<16)-1); BITUNPACKV32_16(__iv, __ov, __parm); break;\
|
||||
case 17: mv = _mm_set1_epi32((1u<<17)-1); BITUNPACKV32_17(__iv, __ov, __parm); break;\
|
||||
case 18: mv = _mm_set1_epi32((1u<<18)-1); BITUNPACKV32_18(__iv, __ov, __parm); break;\
|
||||
case 19: mv = _mm_set1_epi32((1u<<19)-1); BITUNPACKV32_19(__iv, __ov, __parm); break;\
|
||||
case 20: mv = _mm_set1_epi32((1u<<20)-1); BITUNPACKV32_20(__iv, __ov, __parm); break;\
|
||||
case 21: mv = _mm_set1_epi32((1u<<21)-1); BITUNPACKV32_21(__iv, __ov, __parm); break;\
|
||||
case 22: mv = _mm_set1_epi32((1u<<22)-1); BITUNPACKV32_22(__iv, __ov, __parm); break;\
|
||||
case 23: mv = _mm_set1_epi32((1u<<23)-1); BITUNPACKV32_23(__iv, __ov, __parm); break;\
|
||||
case 24: mv = _mm_set1_epi32((1u<<24)-1); BITUNPACKV32_24(__iv, __ov, __parm); break;\
|
||||
case 25: mv = _mm_set1_epi32((1u<<25)-1); BITUNPACKV32_25(__iv, __ov, __parm); break;\
|
||||
case 26: mv = _mm_set1_epi32((1u<<26)-1); BITUNPACKV32_26(__iv, __ov, __parm); break;\
|
||||
case 27: mv = _mm_set1_epi32((1u<<27)-1); BITUNPACKV32_27(__iv, __ov, __parm); break;\
|
||||
case 28: mv = _mm_set1_epi32((1u<<28)-1); BITUNPACKV32_28(__iv, __ov, __parm); break;\
|
||||
case 29: mv = _mm_set1_epi32((1u<<29)-1); BITUNPACKV32_29(__iv, __ov, __parm); break;\
|
||||
case 30: mv = _mm_set1_epi32((1u<<30)-1); BITUNPACKV32_30(__iv, __ov, __parm); break;\
|
||||
case 31: mv = _mm_set1_epi32((1u<<31)-1); BITUNPACKV32_31(__iv, __ov, __parm); break;\
|
||||
case 32: mv = _mm_set1_epi32((1ull<<32)-1);BITUNPACKV32_32(__iv, __ov, __parm); break;\
|
||||
case 0: BITUNPACK0(_parm_); BITUNPACKV32_0( _iv, _ov, _parm_); break;\
|
||||
case 1: mv = _mm_set1_epi32((1u<< 1)-1); BITUNPACKV32_1( _iv, _ov, _parm_); break;\
|
||||
case 2: mv = _mm_set1_epi32((1u<< 2)-1); BITUNPACKV32_2( _iv, _ov, _parm_); break;\
|
||||
case 3: mv = _mm_set1_epi32((1u<< 3)-1); BITUNPACKV32_3( _iv, _ov, _parm_); break;\
|
||||
case 4: mv = _mm_set1_epi32((1u<< 4)-1); BITUNPACKV32_4( _iv, _ov, _parm_); break;\
|
||||
case 5: mv = _mm_set1_epi32((1u<< 5)-1); BITUNPACKV32_5( _iv, _ov, _parm_); break;\
|
||||
case 6: mv = _mm_set1_epi32((1u<< 6)-1); BITUNPACKV32_6( _iv, _ov, _parm_); break;\
|
||||
case 7: mv = _mm_set1_epi32((1u<< 7)-1); BITUNPACKV32_7( _iv, _ov, _parm_); break;\
|
||||
case 8: mv = _mm_set1_epi32((1u<< 8)-1); BITUNPACKV32_8( _iv, _ov, _parm_); break;\
|
||||
case 9: mv = _mm_set1_epi32((1u<< 9)-1); BITUNPACKV32_9( _iv, _ov, _parm_); break;\
|
||||
case 10: mv = _mm_set1_epi32((1u<<10)-1); BITUNPACKV32_10(_iv, _ov, _parm_); break;\
|
||||
case 11: mv = _mm_set1_epi32((1u<<11)-1); BITUNPACKV32_11(_iv, _ov, _parm_); break;\
|
||||
case 12: mv = _mm_set1_epi32((1u<<12)-1); BITUNPACKV32_12(_iv, _ov, _parm_); break;\
|
||||
case 13: mv = _mm_set1_epi32((1u<<13)-1); BITUNPACKV32_13(_iv, _ov, _parm_); break;\
|
||||
case 14: mv = _mm_set1_epi32((1u<<14)-1); BITUNPACKV32_14(_iv, _ov, _parm_); break;\
|
||||
case 15: mv = _mm_set1_epi32((1u<<15)-1); BITUNPACKV32_15(_iv, _ov, _parm_); break;\
|
||||
case 16: mv = _mm_set1_epi32((1u<<16)-1); BITUNPACKV32_16(_iv, _ov, _parm_); break;\
|
||||
case 17: mv = _mm_set1_epi32((1u<<17)-1); BITUNPACKV32_17(_iv, _ov, _parm_); break;\
|
||||
case 18: mv = _mm_set1_epi32((1u<<18)-1); BITUNPACKV32_18(_iv, _ov, _parm_); break;\
|
||||
case 19: mv = _mm_set1_epi32((1u<<19)-1); BITUNPACKV32_19(_iv, _ov, _parm_); break;\
|
||||
case 20: mv = _mm_set1_epi32((1u<<20)-1); BITUNPACKV32_20(_iv, _ov, _parm_); break;\
|
||||
case 21: mv = _mm_set1_epi32((1u<<21)-1); BITUNPACKV32_21(_iv, _ov, _parm_); break;\
|
||||
case 22: mv = _mm_set1_epi32((1u<<22)-1); BITUNPACKV32_22(_iv, _ov, _parm_); break;\
|
||||
case 23: mv = _mm_set1_epi32((1u<<23)-1); BITUNPACKV32_23(_iv, _ov, _parm_); break;\
|
||||
case 24: mv = _mm_set1_epi32((1u<<24)-1); BITUNPACKV32_24(_iv, _ov, _parm_); break;\
|
||||
case 25: mv = _mm_set1_epi32((1u<<25)-1); BITUNPACKV32_25(_iv, _ov, _parm_); break;\
|
||||
case 26: mv = _mm_set1_epi32((1u<<26)-1); BITUNPACKV32_26(_iv, _ov, _parm_); break;\
|
||||
case 27: mv = _mm_set1_epi32((1u<<27)-1); BITUNPACKV32_27(_iv, _ov, _parm_); break;\
|
||||
case 28: mv = _mm_set1_epi32((1u<<28)-1); BITUNPACKV32_28(_iv, _ov, _parm_); break;\
|
||||
case 29: mv = _mm_set1_epi32((1u<<29)-1); BITUNPACKV32_29(_iv, _ov, _parm_); break;\
|
||||
case 30: mv = _mm_set1_epi32((1u<<30)-1); BITUNPACKV32_30(_iv, _ov, _parm_); break;\
|
||||
case 31: mv = _mm_set1_epi32((1u<<31)-1); BITUNPACKV32_31(_iv, _ov, _parm_); break;\
|
||||
case 32: mv = _mm_set1_epi32((1ull<<32)-1);BITUNPACKV32_32(_iv, _ov, _parm_); break;\
|
||||
case 33 ... 63: break;\
|
||||
}\
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user